program(1.3) [buildInfo = dict({{"coremlc-component-MIL", "3405.2.1"}, {"coremlc-version", "3404.23.1"}, {"coremltools-component-torch", "2.2.2"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.3.0"}})] { func main(tensor causal_mask, tensor input_ids, state> key_cache, state> value_cache) [FlexibleShapeInformation = tuple>>, tuple, ?>>>>((("DefaultShapes", {{"causal_mask", [1, 1, 1, 1]}, {"input_ids", [1, 1]}}), ("RangeDims", {{"causal_mask", [[1, 1], [1, 1], [1, 2048], [1, 2048]]}, {"input_ids", [[1, 1], [1, 2048]]}})))] { tensor var_7_shape_cast_fp16 = shape(x = causal_mask)[name = string("op_7_shape_cast_fp16")]; int32 gather_0_axis_0 = const()[name = string("gather_0_axis_0"), val = int32(0)]; int32 gather_0_batch_dims_0 = const()[name = string("gather_0_batch_dims_0"), val = int32(0)]; bool gather_0_validate_indices_0 = const()[name = string("gather_0_validate_indices_0"), val = bool(false)]; string var_7_shape_cast_fp16_to_int16_dtype_0 = const()[name = string("op_7_shape_cast_fp16_to_int16_dtype_0"), val = string("int16")]; uint16 select_0_to_uint16 = const()[name = string("select_0_to_uint16"), val = uint16(3)]; tensor var_7_shape_cast_fp16_to_int16 = cast(dtype = var_7_shape_cast_fp16_to_int16_dtype_0, x = var_7_shape_cast_fp16)[name = string("cast_747")]; int16 gather_0_cast_uint16 = gather(axis = gather_0_axis_0, batch_dims = gather_0_batch_dims_0, indices = select_0_to_uint16, validate_indices = gather_0_validate_indices_0, x = var_7_shape_cast_fp16_to_int16)[name = string("gather_0_cast_uint16")]; string gather_0_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_0_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor var_10_shape = shape(x = input_ids)[name = string("op_10_shape")]; int32 gather_1_axis_0 = const()[name = string("gather_1_axis_0"), val = int32(0)]; int32 gather_1_batch_dims_0 = const()[name = string("gather_1_batch_dims_0"), val = int32(0)]; bool gather_1_validate_indices_0 = const()[name = string("gather_1_validate_indices_0"), val = bool(false)]; string var_10_shape_to_uint16_dtype_0 = const()[name = string("op_10_shape_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_1_to_uint16 = const()[name = string("select_1_to_uint16"), val = uint16(1)]; tensor var_10_shape_to_uint16 = cast(dtype = var_10_shape_to_uint16_dtype_0, x = var_10_shape)[name = string("cast_745")]; uint16 gather_1_cast_uint16 = gather(axis = gather_1_axis_0, batch_dims = gather_1_batch_dims_0, indices = select_1_to_uint16, validate_indices = gather_1_validate_indices_0, x = var_10_shape_to_uint16)[name = string("gather_1_cast_uint16")]; string gather_1_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_1_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_1_cast_uint16_to_int32 = cast(dtype = gather_1_cast_uint16_to_int32_dtype_0, x = gather_1_cast_uint16)[name = string("cast_744")]; int32 gather_0_cast_uint16_to_int32 = cast(dtype = gather_0_cast_uint16_to_int32_dtype_0, x = gather_0_cast_uint16)[name = string("cast_746")]; int32 past_seen_tokens = sub(x = gather_0_cast_uint16_to_int32, y = gather_1_cast_uint16_to_int32)[name = string("past_seen_tokens")]; int32 var_72 = const()[name = string("op_72"), val = int32(-1)]; int32 var_78 = const()[name = string("op_78"), val = int32(2)]; int32 inputs_embeds_axis_0 = const()[name = string("inputs_embeds_axis_0"), val = int32(0)]; int32 inputs_embeds_batch_dims_0 = const()[name = string("inputs_embeds_batch_dims_0"), val = int32(0)]; bool inputs_embeds_validate_indices_0 = const()[name = string("inputs_embeds_validate_indices_0"), val = bool(false)]; tensor model_model_embed_tokens_weight_to_fp16 = const()[name = string("model_model_embed_tokens_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(64)))]; tensor inputs_embeds_cast_fp16 = gather(axis = inputs_embeds_axis_0, batch_dims = inputs_embeds_batch_dims_0, indices = input_ids, validate_indices = inputs_embeds_validate_indices_0, x = model_model_embed_tokens_weight_to_fp16)[name = string("inputs_embeds_cast_fp16")]; tensor var_174_shape_cast_fp16 = shape(x = inputs_embeds_cast_fp16)[name = string("op_174_shape_cast_fp16")]; int32 gather_2_axis_0 = const()[name = string("gather_2_axis_0"), val = int32(0)]; int32 gather_2_batch_dims_0 = const()[name = string("gather_2_batch_dims_0"), val = int32(0)]; bool gather_2_validate_indices_0 = const()[name = string("gather_2_validate_indices_0"), val = bool(false)]; string var_174_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_174_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_2_to_uint16 = const()[name = string("select_2_to_uint16"), val = uint16(1)]; tensor var_174_shape_cast_fp16_to_uint16 = cast(dtype = var_174_shape_cast_fp16_to_uint16_dtype_0, x = var_174_shape_cast_fp16)[name = string("cast_743")]; uint16 gather_2_cast_uint16 = gather(axis = gather_2_axis_0, batch_dims = gather_2_batch_dims_0, indices = select_2_to_uint16, validate_indices = gather_2_validate_indices_0, x = var_174_shape_cast_fp16_to_uint16)[name = string("gather_2_cast_uint16")]; string gather_2_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_2_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_2_cast_uint16_to_int32 = cast(dtype = gather_2_cast_uint16_to_int32_dtype_0, x = gather_2_cast_uint16)[name = string("cast_742")]; int32 var_176 = add(x = past_seen_tokens, y = gather_2_cast_uint16_to_int32)[name = string("op_176")]; int32 const_0 = const()[name = string("const_0"), val = int32(1)]; tensor cache_position = range_1d(end = var_176, start = past_seen_tokens, step = const_0)[name = string("cache_position")]; tensor position_ids_axes_0 = const()[name = string("position_ids_axes_0"), val = tensor([0])]; tensor position_ids = expand_dims(axes = position_ids_axes_0, x = cache_position)[name = string("position_ids")]; tensor var_190_axes_0 = const()[name = string("op_190_axes_0"), val = tensor([1])]; tensor var_190 = expand_dims(axes = var_190_axes_0, x = position_ids)[name = string("op_190")]; bool var_195_transpose_x_0 = const()[name = string("op_195_transpose_x_0"), val = bool(false)]; bool var_195_transpose_y_0 = const()[name = string("op_195_transpose_y_0"), val = bool(false)]; tensor const_3_to_fp16 = const()[name = string("const_3_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311165056)))]; string cast_2_to_fp16_dtype_0 = const()[name = string("cast_2_to_fp16_dtype_0"), val = string("fp16")]; tensor var_190_to_fp16 = cast(dtype = cast_2_to_fp16_dtype_0, x = var_190)[name = string("cast_741")]; tensor var_195_cast_fp16 = matmul(transpose_x = var_195_transpose_x_0, transpose_y = var_195_transpose_y_0, x = const_3_to_fp16, y = var_190_to_fp16)[name = string("op_195_cast_fp16")]; tensor freqs_perm_0 = const()[name = string("freqs_perm_0"), val = tensor([0, 2, 1])]; bool emb_interleave_0 = const()[name = string("emb_interleave_0"), val = bool(false)]; tensor freqs_cast_fp16 = transpose(perm = freqs_perm_0, x = var_195_cast_fp16)[name = string("transpose_112")]; tensor emb_cast_fp16 = concat(axis = var_72, interleave = emb_interleave_0, values = (freqs_cast_fp16, freqs_cast_fp16))[name = string("emb_cast_fp16")]; tensor var_199_cast_fp16 = cos(x = emb_cast_fp16)[name = string("op_199_cast_fp16")]; tensor var_202_cast_fp16 = sin(x = emb_cast_fp16)[name = string("op_202_cast_fp16")]; fp16 var_78_promoted_to_fp16 = const()[name = string("op_78_promoted_to_fp16"), val = fp16(0x1p+1)]; tensor var_216_cast_fp16 = pow(x = inputs_embeds_cast_fp16, y = var_78_promoted_to_fp16)[name = string("op_216_cast_fp16")]; tensor variance_1_axes_0 = const()[name = string("variance_1_axes_0"), val = tensor([-1])]; bool variance_1_keep_dims_0 = const()[name = string("variance_1_keep_dims_0"), val = bool(true)]; tensor variance_1_cast_fp16 = reduce_mean(axes = variance_1_axes_0, keep_dims = variance_1_keep_dims_0, x = var_216_cast_fp16)[name = string("variance_1_cast_fp16")]; fp16 var_219_to_fp16 = const()[name = string("op_219_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_220_cast_fp16 = add(x = variance_1_cast_fp16, y = var_219_to_fp16)[name = string("op_220_cast_fp16")]; fp32 var_221_epsilon_0 = const()[name = string("op_221_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_221_cast_fp16 = rsqrt(epsilon = var_221_epsilon_0, x = var_220_cast_fp16)[name = string("op_221_cast_fp16")]; tensor hidden_states_3_cast_fp16 = mul(x = inputs_embeds_cast_fp16, y = var_221_cast_fp16)[name = string("hidden_states_3_cast_fp16")]; tensor model_model_layers_0_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311165248)))]; tensor hidden_states_7_cast_fp16 = mul(x = model_model_layers_0_input_layernorm_weight_to_fp16, y = hidden_states_3_cast_fp16)[name = string("hidden_states_7_cast_fp16")]; tensor var_234_shape_cast_fp16 = shape(x = hidden_states_7_cast_fp16)[name = string("op_234_shape_cast_fp16")]; int32 gather_4 = const()[name = string("gather_4"), val = int32(1)]; int32 gather_5_axis_0 = const()[name = string("gather_5_axis_0"), val = int32(0)]; int32 gather_5_batch_dims_0 = const()[name = string("gather_5_batch_dims_0"), val = int32(0)]; bool gather_5_validate_indices_0 = const()[name = string("gather_5_validate_indices_0"), val = bool(false)]; string var_234_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_234_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_5_to_uint16 = const()[name = string("select_5_to_uint16"), val = uint16(1)]; tensor var_234_shape_cast_fp16_to_uint16 = cast(dtype = var_234_shape_cast_fp16_to_uint16_dtype_0, x = var_234_shape_cast_fp16)[name = string("cast_740")]; uint16 gather_5_cast_uint16 = gather(axis = gather_5_axis_0, batch_dims = gather_5_batch_dims_0, indices = select_5_to_uint16, validate_indices = gather_5_validate_indices_0, x = var_234_shape_cast_fp16_to_uint16)[name = string("gather_5_cast_uint16")]; string gather_5_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_5_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_0_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(311167360)))]; tensor linear_0_bias_0_to_fp16 = const()[name = string("linear_0_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315361728)))]; tensor linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_0_self_attn_q_proj_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_0_cast_fp16")]; tensor concat_1x = const()[name = string("concat_1x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_9_cast_fp16 = reshape(shape = concat_1x, x = linear_0_cast_fp16)[name = string("hidden_states_9_cast_fp16")]; fp16 var_78_promoted_1_to_fp16 = const()[name = string("op_78_promoted_1_to_fp16"), val = fp16(0x1p+1)]; tensor var_242_cast_fp16 = pow(x = hidden_states_9_cast_fp16, y = var_78_promoted_1_to_fp16)[name = string("op_242_cast_fp16")]; tensor variance_3_axes_0 = const()[name = string("variance_3_axes_0"), val = tensor([-1])]; bool variance_3_keep_dims_0 = const()[name = string("variance_3_keep_dims_0"), val = bool(true)]; tensor variance_3_cast_fp16 = reduce_mean(axes = variance_3_axes_0, keep_dims = variance_3_keep_dims_0, x = var_242_cast_fp16)[name = string("variance_3_cast_fp16")]; fp16 var_245_to_fp16 = const()[name = string("op_245_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_246_cast_fp16 = add(x = variance_3_cast_fp16, y = var_245_to_fp16)[name = string("op_246_cast_fp16")]; fp32 var_247_epsilon_0 = const()[name = string("op_247_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_247_cast_fp16 = rsqrt(epsilon = var_247_epsilon_0, x = var_246_cast_fp16)[name = string("op_247_cast_fp16")]; tensor hidden_states_13_cast_fp16 = mul(x = hidden_states_9_cast_fp16, y = var_247_cast_fp16)[name = string("hidden_states_13_cast_fp16")]; tensor model_model_layers_0_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315365888)))]; tensor var_250_cast_fp16 = mul(x = model_model_layers_0_self_attn_q_norm_weight_to_fp16, y = hidden_states_13_cast_fp16)[name = string("op_250_cast_fp16")]; tensor q_1_perm_0 = const()[name = string("q_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_0_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(315366208)))]; tensor linear_1_bias_0_to_fp16 = const()[name = string("linear_1_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317463424)))]; tensor linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_0_self_attn_k_proj_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_1_cast_fp16")]; tensor concat_2x = const()[name = string("concat_2x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_17_cast_fp16 = reshape(shape = concat_2x, x = linear_1_cast_fp16)[name = string("hidden_states_17_cast_fp16")]; fp16 var_78_promoted_2_to_fp16 = const()[name = string("op_78_promoted_2_to_fp16"), val = fp16(0x1p+1)]; tensor var_258_cast_fp16 = pow(x = hidden_states_17_cast_fp16, y = var_78_promoted_2_to_fp16)[name = string("op_258_cast_fp16")]; tensor variance_5_axes_0 = const()[name = string("variance_5_axes_0"), val = tensor([-1])]; bool variance_5_keep_dims_0 = const()[name = string("variance_5_keep_dims_0"), val = bool(true)]; tensor variance_5_cast_fp16 = reduce_mean(axes = variance_5_axes_0, keep_dims = variance_5_keep_dims_0, x = var_258_cast_fp16)[name = string("variance_5_cast_fp16")]; fp16 var_261_to_fp16 = const()[name = string("op_261_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_262_cast_fp16 = add(x = variance_5_cast_fp16, y = var_261_to_fp16)[name = string("op_262_cast_fp16")]; fp32 var_263_epsilon_0 = const()[name = string("op_263_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_263_cast_fp16 = rsqrt(epsilon = var_263_epsilon_0, x = var_262_cast_fp16)[name = string("op_263_cast_fp16")]; tensor hidden_states_21_cast_fp16 = mul(x = hidden_states_17_cast_fp16, y = var_263_cast_fp16)[name = string("hidden_states_21_cast_fp16")]; tensor model_model_layers_0_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317465536)))]; tensor var_266_cast_fp16 = mul(x = model_model_layers_0_self_attn_k_norm_weight_to_fp16, y = hidden_states_21_cast_fp16)[name = string("op_266_cast_fp16")]; tensor k_1_perm_0 = const()[name = string("k_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_0_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(317465856)))]; tensor linear_2_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_0_self_attn_v_proj_weight_to_fp16, x = hidden_states_7_cast_fp16)[name = string("linear_2_cast_fp16")]; tensor concat_3x = const()[name = string("concat_3x"), val = tensor([1, -1, 8, 128])]; tensor var_271_cast_fp16 = reshape(shape = concat_3x, x = linear_2_cast_fp16)[name = string("op_271_cast_fp16")]; tensor v_state_1_perm_0 = const()[name = string("v_state_1_perm_0"), val = tensor([0, 2, 1, 3])]; tensor cos_5_axes_0 = const()[name = string("cos_5_axes_0"), val = tensor([1])]; tensor cos_5_cast_fp16 = expand_dims(axes = cos_5_axes_0, x = var_199_cast_fp16)[name = string("cos_5_cast_fp16")]; tensor sin_5_axes_0 = const()[name = string("sin_5_axes_0"), val = tensor([1])]; tensor sin_5_cast_fp16 = expand_dims(axes = sin_5_axes_0, x = var_202_cast_fp16)[name = string("sin_5_cast_fp16")]; tensor q_1_cast_fp16 = transpose(perm = q_1_perm_0, x = var_250_cast_fp16)[name = string("transpose_111")]; tensor var_275_cast_fp16 = mul(x = q_1_cast_fp16, y = cos_5_cast_fp16)[name = string("op_275_cast_fp16")]; tensor x1_1_begin_0 = const()[name = string("x1_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_1_end_0 = const()[name = string("x1_1_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_1_end_mask_0 = const()[name = string("x1_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_1_cast_fp16 = slice_by_index(begin = x1_1_begin_0, end = x1_1_end_0, end_mask = x1_1_end_mask_0, x = q_1_cast_fp16)[name = string("x1_1_cast_fp16")]; tensor x2_1_begin_0 = const()[name = string("x2_1_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_1_end_0 = const()[name = string("x2_1_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_1_end_mask_0 = const()[name = string("x2_1_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_1_cast_fp16 = slice_by_index(begin = x2_1_begin_0, end = x2_1_end_0, end_mask = x2_1_end_mask_0, x = q_1_cast_fp16)[name = string("x2_1_cast_fp16")]; fp16 const_4_promoted_to_fp16 = const()[name = string("const_4_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_286_cast_fp16 = mul(x = x2_1_cast_fp16, y = const_4_promoted_to_fp16)[name = string("op_286_cast_fp16")]; bool var_288_interleave_0 = const()[name = string("op_288_interleave_0"), val = bool(false)]; tensor var_288_cast_fp16 = concat(axis = var_72, interleave = var_288_interleave_0, values = (var_286_cast_fp16, x1_1_cast_fp16))[name = string("op_288_cast_fp16")]; tensor var_289_cast_fp16 = mul(x = var_288_cast_fp16, y = sin_5_cast_fp16)[name = string("op_289_cast_fp16")]; tensor query_1_cast_fp16 = add(x = var_275_cast_fp16, y = var_289_cast_fp16)[name = string("query_1_cast_fp16")]; tensor k_1_cast_fp16 = transpose(perm = k_1_perm_0, x = var_266_cast_fp16)[name = string("transpose_110")]; tensor var_291_cast_fp16 = mul(x = k_1_cast_fp16, y = cos_5_cast_fp16)[name = string("op_291_cast_fp16")]; tensor x1_3_begin_0 = const()[name = string("x1_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_3_end_0 = const()[name = string("x1_3_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_3_end_mask_0 = const()[name = string("x1_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_3_cast_fp16 = slice_by_index(begin = x1_3_begin_0, end = x1_3_end_0, end_mask = x1_3_end_mask_0, x = k_1_cast_fp16)[name = string("x1_3_cast_fp16")]; tensor x2_3_begin_0 = const()[name = string("x2_3_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_3_end_0 = const()[name = string("x2_3_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_3_end_mask_0 = const()[name = string("x2_3_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_3_cast_fp16 = slice_by_index(begin = x2_3_begin_0, end = x2_3_end_0, end_mask = x2_3_end_mask_0, x = k_1_cast_fp16)[name = string("x2_3_cast_fp16")]; fp16 const_5_promoted_to_fp16 = const()[name = string("const_5_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_302_cast_fp16 = mul(x = x2_3_cast_fp16, y = const_5_promoted_to_fp16)[name = string("op_302_cast_fp16")]; bool var_304_interleave_0 = const()[name = string("op_304_interleave_0"), val = bool(false)]; tensor var_304_cast_fp16 = concat(axis = var_72, interleave = var_304_interleave_0, values = (var_302_cast_fp16, x1_3_cast_fp16))[name = string("op_304_cast_fp16")]; tensor var_305_cast_fp16 = mul(x = var_304_cast_fp16, y = sin_5_cast_fp16)[name = string("op_305_cast_fp16")]; tensor k_state_1_cast_fp16 = add(x = var_291_cast_fp16, y = var_305_cast_fp16)[name = string("k_state_1_cast_fp16")]; tensor var_307_shape = shape(x = cache_position)[name = string("op_307_shape")]; int32 gather_10_axis_0 = const()[name = string("gather_10_axis_0"), val = int32(0)]; int32 gather_10_batch_dims_0 = const()[name = string("gather_10_batch_dims_0"), val = int32(0)]; bool gather_10_validate_indices_0 = const()[name = string("gather_10_validate_indices_0"), val = bool(false)]; string var_307_shape_to_uint16_dtype_0 = const()[name = string("op_307_shape_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_10_to_uint16 = const()[name = string("select_10_to_uint16"), val = uint16(0)]; tensor var_307_shape_to_uint16 = cast(dtype = var_307_shape_to_uint16_dtype_0, x = var_307_shape)[name = string("cast_738")]; uint16 gather_10_cast_uint16 = gather(axis = gather_10_axis_0, batch_dims = gather_10_batch_dims_0, indices = select_10_to_uint16, validate_indices = gather_10_validate_indices_0, x = var_307_shape_to_uint16)[name = string("gather_10_cast_uint16")]; string gather_10_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_10_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_10_cast_uint16_to_int32 = cast(dtype = gather_10_cast_uint16_to_int32_dtype_0, x = gather_10_cast_uint16)[name = string("cast_737")]; int32 end_1 = add(x = past_seen_tokens, y = gather_10_cast_uint16_to_int32)[name = string("end_1")]; tensor read_state_0 = read_state(input = key_cache)[name = string("read_state_0")]; tensor expand_dims_0 = const()[name = string("expand_dims_0"), val = tensor([0])]; tensor expand_dims_1 = const()[name = string("expand_dims_1"), val = tensor([0])]; tensor expand_dims_2_axes_0 = const()[name = string("expand_dims_2_axes_0"), val = tensor([0])]; tensor expand_dims_2 = expand_dims(axes = expand_dims_2_axes_0, x = past_seen_tokens)[name = string("expand_dims_2")]; tensor expand_dims_3 = const()[name = string("expand_dims_3"), val = tensor([0])]; tensor expand_dims_4 = const()[name = string("expand_dims_4"), val = tensor([8])]; tensor expand_dims_5_axes_0 = const()[name = string("expand_dims_5_axes_0"), val = tensor([0])]; tensor expand_dims_5 = expand_dims(axes = expand_dims_5_axes_0, x = end_1)[name = string("expand_dims_5")]; tensor concat_6_values0_0 = const()[name = string("concat_6_values0_0"), val = tensor([0])]; int32 concat_6_axis_0 = const()[name = string("concat_6_axis_0"), val = int32(0)]; bool concat_6_interleave_0 = const()[name = string("concat_6_interleave_0"), val = bool(false)]; tensor concat_6 = concat(axis = concat_6_axis_0, interleave = concat_6_interleave_0, values = (concat_6_values0_0, expand_dims_0, expand_dims_1, expand_dims_2, expand_dims_3))[name = string("concat_6")]; tensor concat_7_values0_0 = const()[name = string("concat_7_values0_0"), val = tensor([0])]; tensor concat_7_values1_0 = const()[name = string("concat_7_values1_0"), val = tensor([0])]; tensor concat_7_values4_0 = const()[name = string("concat_7_values4_0"), val = tensor([0])]; int32 concat_7_axis_0 = const()[name = string("concat_7_axis_0"), val = int32(0)]; bool concat_7_interleave_0 = const()[name = string("concat_7_interleave_0"), val = bool(false)]; tensor concat_7 = concat(axis = concat_7_axis_0, interleave = concat_7_interleave_0, values = (concat_7_values0_0, concat_7_values1_0, expand_dims_4, expand_dims_5, concat_7_values4_0))[name = string("concat_7")]; tensor key_cache_internal_tensor_assign_1_stride_0 = const()[name = string("key_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_6, begin_mask = key_cache_internal_tensor_assign_1_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_1_squeeze_mask_0, stride = key_cache_internal_tensor_assign_1_stride_0, update = k_state_1_cast_fp16, x = read_state_0)[name = string("key_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_1_cast_fp16, input = key_cache)[name = string("coreml_update_state_56_write_state")]; tensor coreml_update_state_56 = read_state(input = key_cache)[name = string("coreml_update_state_56")]; tensor read_state_1 = read_state(input = value_cache)[name = string("read_state_1")]; tensor value_cache_internal_tensor_assign_1_stride_0 = const()[name = string("value_cache_internal_tensor_assign_1_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_1_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_1_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_1_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_1_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_1_cast_fp16 = transpose(perm = v_state_1_perm_0, x = var_271_cast_fp16)[name = string("transpose_109")]; tensor value_cache_internal_tensor_assign_1_cast_fp16 = slice_update(begin = concat_6, begin_mask = value_cache_internal_tensor_assign_1_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_1_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_1_squeeze_mask_0, stride = value_cache_internal_tensor_assign_1_stride_0, update = v_state_1_cast_fp16, x = read_state_1)[name = string("value_cache_internal_tensor_assign_1_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_1_cast_fp16, input = value_cache)[name = string("coreml_update_state_57_write_state")]; tensor coreml_update_state_57 = read_state(input = value_cache)[name = string("coreml_update_state_57")]; tensor var_328_begin_0 = const()[name = string("op_328_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_328_end_0 = const()[name = string("op_328_end_0"), val = tensor([1, 1, 8, 2048, 128])]; tensor var_328_end_mask_0 = const()[name = string("op_328_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_328_squeeze_mask_0 = const()[name = string("op_328_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_328_cast_fp16 = slice_by_index(begin = var_328_begin_0, end = var_328_end_0, end_mask = var_328_end_mask_0, squeeze_mask = var_328_squeeze_mask_0, x = coreml_update_state_56)[name = string("op_328_cast_fp16")]; int32 concat_12_values0_0 = const()[name = string("concat_12_values0_0"), val = int32(1)]; int32 concat_12_values1_0 = const()[name = string("concat_12_values1_0"), val = int32(8)]; int32 concat_12_values3_0 = const()[name = string("concat_12_values3_0"), val = int32(128)]; int32 concat_12_axis_0 = const()[name = string("concat_12_axis_0"), val = int32(0)]; bool concat_12_interleave_0 = const()[name = string("concat_12_interleave_0"), val = bool(false)]; tensor concat_12 = concat(axis = concat_12_axis_0, interleave = concat_12_interleave_0, values = (concat_12_values0_0, concat_12_values1_0, end_1, concat_12_values3_0))[name = string("concat_12")]; tensor var_331_begin_0 = const()[name = string("op_331_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_331_end_mask_0 = const()[name = string("op_331_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_331_cast_fp16 = slice_by_index(begin = var_331_begin_0, end = concat_12, end_mask = var_331_end_mask_0, x = var_328_cast_fp16)[name = string("op_331_cast_fp16")]; tensor var_333_begin_0 = const()[name = string("op_333_begin_0"), val = tensor([0, 0, 0, 0, 0])]; tensor var_333_end_0 = const()[name = string("op_333_end_0"), val = tensor([1, 1, 8, 2048, 128])]; tensor var_333_end_mask_0 = const()[name = string("op_333_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_333_squeeze_mask_0 = const()[name = string("op_333_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_333_cast_fp16 = slice_by_index(begin = var_333_begin_0, end = var_333_end_0, end_mask = var_333_end_mask_0, squeeze_mask = var_333_squeeze_mask_0, x = coreml_update_state_57)[name = string("op_333_cast_fp16")]; tensor var_336_begin_0 = const()[name = string("op_336_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_336_end_mask_0 = const()[name = string("op_336_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_336_cast_fp16 = slice_by_index(begin = var_336_begin_0, end = concat_12, end_mask = var_336_end_mask_0, x = var_333_cast_fp16)[name = string("op_336_cast_fp16")]; tensor var_338_shape_cast_fp16 = shape(x = var_331_cast_fp16)[name = string("op_338_shape_cast_fp16")]; int32 gather_13 = const()[name = string("gather_13"), val = int32(1)]; int32 gather_14 = const()[name = string("gather_14"), val = int32(8)]; int32 gather_15_axis_0 = const()[name = string("gather_15_axis_0"), val = int32(0)]; int32 gather_15_batch_dims_0 = const()[name = string("gather_15_batch_dims_0"), val = int32(0)]; bool gather_15_validate_indices_0 = const()[name = string("gather_15_validate_indices_0"), val = bool(false)]; string var_338_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_338_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_15_to_uint16 = const()[name = string("select_15_to_uint16"), val = uint16(2)]; tensor var_338_shape_cast_fp16_to_uint16 = cast(dtype = var_338_shape_cast_fp16_to_uint16_dtype_0, x = var_338_shape_cast_fp16)[name = string("cast_736")]; uint16 gather_15_cast_uint16 = gather(axis = gather_15_axis_0, batch_dims = gather_15_batch_dims_0, indices = select_15_to_uint16, validate_indices = gather_15_validate_indices_0, x = var_338_shape_cast_fp16_to_uint16)[name = string("gather_15_cast_uint16")]; string gather_15_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_15_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_16 = const()[name = string("gather_16"), val = int32(128)]; tensor var_345_axes_0 = const()[name = string("op_345_axes_0"), val = tensor([2])]; tensor var_345_cast_fp16 = expand_dims(axes = var_345_axes_0, x = var_331_cast_fp16)[name = string("op_345_cast_fp16")]; int32 concat_14_axis_0 = const()[name = string("concat_14_axis_0"), val = int32(0)]; bool concat_14_interleave_0 = const()[name = string("concat_14_interleave_0"), val = bool(false)]; int32 gather_15_cast_uint16_to_int32 = cast(dtype = gather_15_cast_uint16_to_int32_dtype_0, x = gather_15_cast_uint16)[name = string("cast_735")]; tensor concat_14 = concat(axis = concat_14_axis_0, interleave = concat_14_interleave_0, values = (gather_13, gather_14, var_78, gather_15_cast_uint16_to_int32, gather_16))[name = string("concat_14")]; tensor shape_17_cast_fp16 = shape(x = var_345_cast_fp16)[name = string("shape_17_cast_fp16")]; tensor real_div_0 = real_div(x = concat_14, y = shape_17_cast_fp16)[name = string("real_div_0")]; tensor hidden_states_27_cast_fp16 = tile(reps = real_div_0, x = var_345_cast_fp16)[name = string("hidden_states_27_cast_fp16")]; tensor concat_15x = const()[name = string("concat_15x"), val = tensor([1, 16, -1, 128])]; tensor key_1_cast_fp16 = reshape(shape = concat_15x, x = hidden_states_27_cast_fp16)[name = string("key_1_cast_fp16")]; tensor var_355_shape_cast_fp16 = shape(x = var_336_cast_fp16)[name = string("op_355_shape_cast_fp16")]; int32 gather_17 = const()[name = string("gather_17"), val = int32(1)]; int32 gather_18 = const()[name = string("gather_18"), val = int32(8)]; int32 gather_19_axis_0 = const()[name = string("gather_19_axis_0"), val = int32(0)]; int32 gather_19_batch_dims_0 = const()[name = string("gather_19_batch_dims_0"), val = int32(0)]; bool gather_19_validate_indices_0 = const()[name = string("gather_19_validate_indices_0"), val = bool(false)]; string var_355_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_355_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_19_to_uint16 = const()[name = string("select_19_to_uint16"), val = uint16(2)]; tensor var_355_shape_cast_fp16_to_uint16 = cast(dtype = var_355_shape_cast_fp16_to_uint16_dtype_0, x = var_355_shape_cast_fp16)[name = string("cast_734")]; uint16 gather_19_cast_uint16 = gather(axis = gather_19_axis_0, batch_dims = gather_19_batch_dims_0, indices = select_19_to_uint16, validate_indices = gather_19_validate_indices_0, x = var_355_shape_cast_fp16_to_uint16)[name = string("gather_19_cast_uint16")]; string gather_19_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_19_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_20 = const()[name = string("gather_20"), val = int32(128)]; tensor var_362_axes_0 = const()[name = string("op_362_axes_0"), val = tensor([2])]; tensor var_362_cast_fp16 = expand_dims(axes = var_362_axes_0, x = var_336_cast_fp16)[name = string("op_362_cast_fp16")]; int32 concat_16_axis_0 = const()[name = string("concat_16_axis_0"), val = int32(0)]; bool concat_16_interleave_0 = const()[name = string("concat_16_interleave_0"), val = bool(false)]; int32 gather_19_cast_uint16_to_int32 = cast(dtype = gather_19_cast_uint16_to_int32_dtype_0, x = gather_19_cast_uint16)[name = string("cast_733")]; tensor concat_16 = concat(axis = concat_16_axis_0, interleave = concat_16_interleave_0, values = (gather_17, gather_18, var_78, gather_19_cast_uint16_to_int32, gather_20))[name = string("concat_16")]; tensor shape_22_cast_fp16 = shape(x = var_362_cast_fp16)[name = string("shape_22_cast_fp16")]; tensor real_div_1 = real_div(x = concat_16, y = shape_22_cast_fp16)[name = string("real_div_1")]; tensor hidden_states_31_cast_fp16 = tile(reps = real_div_1, x = var_362_cast_fp16)[name = string("hidden_states_31_cast_fp16")]; tensor concat_17x = const()[name = string("concat_17x"), val = tensor([1, 16, -1, 128])]; tensor value_1_cast_fp16 = reshape(shape = concat_17x, x = hidden_states_31_cast_fp16)[name = string("value_1_cast_fp16")]; tensor var_372_shape_cast_fp16 = shape(x = key_1_cast_fp16)[name = string("op_372_shape_cast_fp16")]; int32 gather_21_axis_0 = const()[name = string("gather_21_axis_0"), val = int32(0)]; int32 gather_21_batch_dims_0 = const()[name = string("gather_21_batch_dims_0"), val = int32(0)]; bool gather_21_validate_indices_0 = const()[name = string("gather_21_validate_indices_0"), val = bool(false)]; string var_372_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_372_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_21_to_uint16 = const()[name = string("select_21_to_uint16"), val = uint16(2)]; tensor var_372_shape_cast_fp16_to_uint16 = cast(dtype = var_372_shape_cast_fp16_to_uint16_dtype_0, x = var_372_shape_cast_fp16)[name = string("cast_732")]; uint16 gather_21_cast_uint16 = gather(axis = gather_21_axis_0, batch_dims = gather_21_batch_dims_0, indices = select_21_to_uint16, validate_indices = gather_21_validate_indices_0, x = var_372_shape_cast_fp16_to_uint16)[name = string("gather_21_cast_uint16")]; string gather_21_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_21_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_18_values0_0 = const()[name = string("concat_18_values0_0"), val = int32(1)]; int32 concat_18_values1_0 = const()[name = string("concat_18_values1_0"), val = int32(1)]; int32 concat_18_values2_0 = const()[name = string("concat_18_values2_0"), val = int32(0)]; int32 concat_18_axis_0 = const()[name = string("concat_18_axis_0"), val = int32(0)]; bool concat_18_interleave_0 = const()[name = string("concat_18_interleave_0"), val = bool(false)]; int32 gather_21_cast_uint16_to_int32 = cast(dtype = gather_21_cast_uint16_to_int32_dtype_0, x = gather_21_cast_uint16)[name = string("cast_731")]; tensor concat_18 = concat(axis = concat_18_axis_0, interleave = concat_18_interleave_0, values = (concat_18_values0_0, concat_18_values1_0, concat_18_values2_0, gather_21_cast_uint16_to_int32))[name = string("concat_18")]; tensor attention_mask_1_begin_0 = const()[name = string("attention_mask_1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_1_end_mask_0 = const()[name = string("attention_mask_1_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_1_cast_fp16 = slice_by_index(begin = attention_mask_1_begin_0, end = concat_18, end_mask = attention_mask_1_end_mask_0, x = causal_mask)[name = string("attention_mask_1_cast_fp16")]; fp16 var_85_to_fp16 = const()[name = string("op_85_to_fp16"), val = fp16(0x1.6ap-4)]; tensor mul_0_cast_fp16 = mul(x = query_1_cast_fp16, y = var_85_to_fp16)[name = string("mul_0_cast_fp16")]; bool matmul_0_transpose_y_0 = const()[name = string("matmul_0_transpose_y_0"), val = bool(true)]; bool matmul_0_transpose_x_0 = const()[name = string("matmul_0_transpose_x_0"), val = bool(false)]; tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = mul_0_cast_fp16, y = key_1_cast_fp16)[name = string("matmul_0_cast_fp16")]; tensor add_22_cast_fp16 = add(x = matmul_0_cast_fp16, y = attention_mask_1_cast_fp16)[name = string("add_22_cast_fp16")]; int32 softmax_0_axis_0 = const()[name = string("softmax_0_axis_0"), val = int32(-1)]; tensor softmax_0_cast_fp16 = softmax(axis = softmax_0_axis_0, x = add_22_cast_fp16)[name = string("softmax_0_cast_fp16")]; bool attn_output_1_transpose_x_0 = const()[name = string("attn_output_1_transpose_x_0"), val = bool(false)]; bool attn_output_1_transpose_y_0 = const()[name = string("attn_output_1_transpose_y_0"), val = bool(false)]; tensor attn_output_1_cast_fp16 = matmul(transpose_x = attn_output_1_transpose_x_0, transpose_y = attn_output_1_transpose_y_0, x = softmax_0_cast_fp16, y = value_1_cast_fp16)[name = string("attn_output_1_cast_fp16")]; tensor var_381_perm_0 = const()[name = string("op_381_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_19_axis_0 = const()[name = string("concat_19_axis_0"), val = int32(0)]; bool concat_19_interleave_0 = const()[name = string("concat_19_interleave_0"), val = bool(false)]; int32 gather_5_cast_uint16_to_int32 = cast(dtype = gather_5_cast_uint16_to_int32_dtype_0, x = gather_5_cast_uint16)[name = string("cast_739")]; tensor concat_19 = concat(axis = concat_19_axis_0, interleave = concat_19_interleave_0, values = (gather_4, gather_5_cast_uint16_to_int32, var_72))[name = string("concat_19")]; tensor var_381_cast_fp16 = transpose(perm = var_381_perm_0, x = attn_output_1_cast_fp16)[name = string("transpose_108")]; tensor var_384_cast_fp16 = reshape(shape = concat_19, x = var_381_cast_fp16)[name = string("op_384_cast_fp16")]; tensor model_model_layers_0_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_0_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(319563072)))]; tensor linear_3_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_0_self_attn_o_proj_weight_to_fp16, x = var_384_cast_fp16)[name = string("linear_3_cast_fp16")]; tensor hidden_states_35_cast_fp16 = add(x = inputs_embeds_cast_fp16, y = linear_3_cast_fp16)[name = string("hidden_states_35_cast_fp16")]; fp16 var_78_promoted_3_to_fp16 = const()[name = string("op_78_promoted_3_to_fp16"), val = fp16(0x1p+1)]; tensor var_391_cast_fp16 = pow(x = hidden_states_35_cast_fp16, y = var_78_promoted_3_to_fp16)[name = string("op_391_cast_fp16")]; tensor variance_7_axes_0 = const()[name = string("variance_7_axes_0"), val = tensor([-1])]; bool variance_7_keep_dims_0 = const()[name = string("variance_7_keep_dims_0"), val = bool(true)]; tensor variance_7_cast_fp16 = reduce_mean(axes = variance_7_axes_0, keep_dims = variance_7_keep_dims_0, x = var_391_cast_fp16)[name = string("variance_7_cast_fp16")]; fp16 var_394_to_fp16 = const()[name = string("op_394_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_395_cast_fp16 = add(x = variance_7_cast_fp16, y = var_394_to_fp16)[name = string("op_395_cast_fp16")]; fp32 var_396_epsilon_0 = const()[name = string("op_396_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_396_cast_fp16 = rsqrt(epsilon = var_396_epsilon_0, x = var_395_cast_fp16)[name = string("op_396_cast_fp16")]; tensor hidden_states_39_cast_fp16 = mul(x = hidden_states_35_cast_fp16, y = var_396_cast_fp16)[name = string("hidden_states_39_cast_fp16")]; tensor model_model_layers_0_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_0_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323757440)))]; tensor input_3_cast_fp16 = mul(x = model_model_layers_0_post_attention_layernorm_weight_to_fp16, y = hidden_states_39_cast_fp16)[name = string("input_3_cast_fp16")]; tensor model_model_layers_0_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_0_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(323759552)))]; tensor linear_4_bias_0_to_fp16 = const()[name = string("linear_4_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330051072)))]; tensor linear_4_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_0_mlp_gate_proj_weight_to_fp16, x = input_3_cast_fp16)[name = string("linear_4_cast_fp16")]; tensor var_408_cast_fp16 = silu(x = linear_4_cast_fp16)[name = string("op_408_cast_fp16")]; tensor model_model_layers_0_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_0_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(330057280)))]; tensor linear_5_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_0_mlp_up_proj_weight_to_fp16, x = input_3_cast_fp16)[name = string("linear_5_cast_fp16")]; tensor input_7_cast_fp16 = mul(x = var_408_cast_fp16, y = linear_5_cast_fp16)[name = string("input_7_cast_fp16")]; tensor model_model_layers_0_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_0_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(336348800)))]; tensor linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_0_mlp_down_proj_weight_to_fp16, x = input_7_cast_fp16)[name = string("linear_6_cast_fp16")]; tensor hidden_states_45_cast_fp16 = add(x = hidden_states_35_cast_fp16, y = linear_6_cast_fp16)[name = string("hidden_states_45_cast_fp16")]; fp16 var_78_promoted_4_to_fp16 = const()[name = string("op_78_promoted_4_to_fp16"), val = fp16(0x1p+1)]; tensor var_421_cast_fp16 = pow(x = hidden_states_45_cast_fp16, y = var_78_promoted_4_to_fp16)[name = string("op_421_cast_fp16")]; tensor variance_9_axes_0 = const()[name = string("variance_9_axes_0"), val = tensor([-1])]; bool variance_9_keep_dims_0 = const()[name = string("variance_9_keep_dims_0"), val = bool(true)]; tensor variance_9_cast_fp16 = reduce_mean(axes = variance_9_axes_0, keep_dims = variance_9_keep_dims_0, x = var_421_cast_fp16)[name = string("variance_9_cast_fp16")]; fp16 var_424_to_fp16 = const()[name = string("op_424_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_425_cast_fp16 = add(x = variance_9_cast_fp16, y = var_424_to_fp16)[name = string("op_425_cast_fp16")]; fp32 var_426_epsilon_0 = const()[name = string("op_426_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_426_cast_fp16 = rsqrt(epsilon = var_426_epsilon_0, x = var_425_cast_fp16)[name = string("op_426_cast_fp16")]; tensor hidden_states_49_cast_fp16 = mul(x = hidden_states_45_cast_fp16, y = var_426_cast_fp16)[name = string("hidden_states_49_cast_fp16")]; tensor model_model_layers_1_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342640320)))]; tensor hidden_states_53_cast_fp16 = mul(x = model_model_layers_1_input_layernorm_weight_to_fp16, y = hidden_states_49_cast_fp16)[name = string("hidden_states_53_cast_fp16")]; tensor var_439_shape_cast_fp16 = shape(x = hidden_states_53_cast_fp16)[name = string("op_439_shape_cast_fp16")]; int32 gather_22 = const()[name = string("gather_22"), val = int32(1)]; int32 gather_23_axis_0 = const()[name = string("gather_23_axis_0"), val = int32(0)]; int32 gather_23_batch_dims_0 = const()[name = string("gather_23_batch_dims_0"), val = int32(0)]; bool gather_23_validate_indices_0 = const()[name = string("gather_23_validate_indices_0"), val = bool(false)]; string var_439_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_439_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_23_to_uint16 = const()[name = string("select_23_to_uint16"), val = uint16(1)]; tensor var_439_shape_cast_fp16_to_uint16 = cast(dtype = var_439_shape_cast_fp16_to_uint16_dtype_0, x = var_439_shape_cast_fp16)[name = string("cast_730")]; uint16 gather_23_cast_uint16 = gather(axis = gather_23_axis_0, batch_dims = gather_23_batch_dims_0, indices = select_23_to_uint16, validate_indices = gather_23_validate_indices_0, x = var_439_shape_cast_fp16_to_uint16)[name = string("gather_23_cast_uint16")]; string gather_23_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_23_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_1_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(342642432)))]; tensor linear_7_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_1_self_attn_q_proj_weight_to_fp16, x = hidden_states_53_cast_fp16)[name = string("linear_7_cast_fp16")]; tensor concat_20x = const()[name = string("concat_20x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_55_cast_fp16 = reshape(shape = concat_20x, x = linear_7_cast_fp16)[name = string("hidden_states_55_cast_fp16")]; fp16 var_78_promoted_5_to_fp16 = const()[name = string("op_78_promoted_5_to_fp16"), val = fp16(0x1p+1)]; tensor var_447_cast_fp16 = pow(x = hidden_states_55_cast_fp16, y = var_78_promoted_5_to_fp16)[name = string("op_447_cast_fp16")]; tensor variance_11_axes_0 = const()[name = string("variance_11_axes_0"), val = tensor([-1])]; bool variance_11_keep_dims_0 = const()[name = string("variance_11_keep_dims_0"), val = bool(true)]; tensor variance_11_cast_fp16 = reduce_mean(axes = variance_11_axes_0, keep_dims = variance_11_keep_dims_0, x = var_447_cast_fp16)[name = string("variance_11_cast_fp16")]; fp16 var_450_to_fp16 = const()[name = string("op_450_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_451_cast_fp16 = add(x = variance_11_cast_fp16, y = var_450_to_fp16)[name = string("op_451_cast_fp16")]; fp32 var_452_epsilon_0 = const()[name = string("op_452_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_452_cast_fp16 = rsqrt(epsilon = var_452_epsilon_0, x = var_451_cast_fp16)[name = string("op_452_cast_fp16")]; tensor hidden_states_59_cast_fp16 = mul(x = hidden_states_55_cast_fp16, y = var_452_cast_fp16)[name = string("hidden_states_59_cast_fp16")]; tensor model_model_layers_1_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346836800)))]; tensor var_455_cast_fp16 = mul(x = model_model_layers_1_self_attn_q_norm_weight_to_fp16, y = hidden_states_59_cast_fp16)[name = string("op_455_cast_fp16")]; tensor q_3_perm_0 = const()[name = string("q_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_1_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(346837120)))]; tensor linear_8_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_1_self_attn_k_proj_weight_to_fp16, x = hidden_states_53_cast_fp16)[name = string("linear_8_cast_fp16")]; tensor concat_21x = const()[name = string("concat_21x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_63_cast_fp16 = reshape(shape = concat_21x, x = linear_8_cast_fp16)[name = string("hidden_states_63_cast_fp16")]; fp16 var_78_promoted_6_to_fp16 = const()[name = string("op_78_promoted_6_to_fp16"), val = fp16(0x1p+1)]; tensor var_463_cast_fp16 = pow(x = hidden_states_63_cast_fp16, y = var_78_promoted_6_to_fp16)[name = string("op_463_cast_fp16")]; tensor variance_13_axes_0 = const()[name = string("variance_13_axes_0"), val = tensor([-1])]; bool variance_13_keep_dims_0 = const()[name = string("variance_13_keep_dims_0"), val = bool(true)]; tensor variance_13_cast_fp16 = reduce_mean(axes = variance_13_axes_0, keep_dims = variance_13_keep_dims_0, x = var_463_cast_fp16)[name = string("variance_13_cast_fp16")]; fp16 var_466_to_fp16 = const()[name = string("op_466_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_467_cast_fp16 = add(x = variance_13_cast_fp16, y = var_466_to_fp16)[name = string("op_467_cast_fp16")]; fp32 var_468_epsilon_0 = const()[name = string("op_468_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_468_cast_fp16 = rsqrt(epsilon = var_468_epsilon_0, x = var_467_cast_fp16)[name = string("op_468_cast_fp16")]; tensor hidden_states_67_cast_fp16 = mul(x = hidden_states_63_cast_fp16, y = var_468_cast_fp16)[name = string("hidden_states_67_cast_fp16")]; tensor model_model_layers_1_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348934336)))]; tensor var_471_cast_fp16 = mul(x = model_model_layers_1_self_attn_k_norm_weight_to_fp16, y = hidden_states_67_cast_fp16)[name = string("op_471_cast_fp16")]; tensor k_3_perm_0 = const()[name = string("k_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_1_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(348934656)))]; tensor linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_1_self_attn_v_proj_weight_to_fp16, x = hidden_states_53_cast_fp16)[name = string("linear_9_cast_fp16")]; tensor concat_22x = const()[name = string("concat_22x"), val = tensor([1, -1, 8, 128])]; tensor var_476_cast_fp16 = reshape(shape = concat_22x, x = linear_9_cast_fp16)[name = string("op_476_cast_fp16")]; tensor v_state_3_perm_0 = const()[name = string("v_state_3_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_3_cast_fp16 = transpose(perm = q_3_perm_0, x = var_455_cast_fp16)[name = string("transpose_107")]; tensor var_480_cast_fp16 = mul(x = q_3_cast_fp16, y = cos_5_cast_fp16)[name = string("op_480_cast_fp16")]; tensor x1_5_begin_0 = const()[name = string("x1_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_5_end_0 = const()[name = string("x1_5_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_5_end_mask_0 = const()[name = string("x1_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_5_cast_fp16 = slice_by_index(begin = x1_5_begin_0, end = x1_5_end_0, end_mask = x1_5_end_mask_0, x = q_3_cast_fp16)[name = string("x1_5_cast_fp16")]; tensor x2_5_begin_0 = const()[name = string("x2_5_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_5_end_0 = const()[name = string("x2_5_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_5_end_mask_0 = const()[name = string("x2_5_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_5_cast_fp16 = slice_by_index(begin = x2_5_begin_0, end = x2_5_end_0, end_mask = x2_5_end_mask_0, x = q_3_cast_fp16)[name = string("x2_5_cast_fp16")]; fp16 const_6_promoted_to_fp16 = const()[name = string("const_6_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_491_cast_fp16 = mul(x = x2_5_cast_fp16, y = const_6_promoted_to_fp16)[name = string("op_491_cast_fp16")]; bool var_493_interleave_0 = const()[name = string("op_493_interleave_0"), val = bool(false)]; tensor var_493_cast_fp16 = concat(axis = var_72, interleave = var_493_interleave_0, values = (var_491_cast_fp16, x1_5_cast_fp16))[name = string("op_493_cast_fp16")]; tensor var_494_cast_fp16 = mul(x = var_493_cast_fp16, y = sin_5_cast_fp16)[name = string("op_494_cast_fp16")]; tensor query_5_cast_fp16 = add(x = var_480_cast_fp16, y = var_494_cast_fp16)[name = string("query_5_cast_fp16")]; tensor k_3_cast_fp16 = transpose(perm = k_3_perm_0, x = var_471_cast_fp16)[name = string("transpose_106")]; tensor var_496_cast_fp16 = mul(x = k_3_cast_fp16, y = cos_5_cast_fp16)[name = string("op_496_cast_fp16")]; tensor x1_7_begin_0 = const()[name = string("x1_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_7_end_0 = const()[name = string("x1_7_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_7_end_mask_0 = const()[name = string("x1_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_7_cast_fp16 = slice_by_index(begin = x1_7_begin_0, end = x1_7_end_0, end_mask = x1_7_end_mask_0, x = k_3_cast_fp16)[name = string("x1_7_cast_fp16")]; tensor x2_7_begin_0 = const()[name = string("x2_7_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_7_end_0 = const()[name = string("x2_7_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_7_end_mask_0 = const()[name = string("x2_7_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_7_cast_fp16 = slice_by_index(begin = x2_7_begin_0, end = x2_7_end_0, end_mask = x2_7_end_mask_0, x = k_3_cast_fp16)[name = string("x2_7_cast_fp16")]; fp16 const_7_promoted_to_fp16 = const()[name = string("const_7_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_507_cast_fp16 = mul(x = x2_7_cast_fp16, y = const_7_promoted_to_fp16)[name = string("op_507_cast_fp16")]; bool var_509_interleave_0 = const()[name = string("op_509_interleave_0"), val = bool(false)]; tensor var_509_cast_fp16 = concat(axis = var_72, interleave = var_509_interleave_0, values = (var_507_cast_fp16, x1_7_cast_fp16))[name = string("op_509_cast_fp16")]; tensor var_510_cast_fp16 = mul(x = var_509_cast_fp16, y = sin_5_cast_fp16)[name = string("op_510_cast_fp16")]; tensor k_state_3_cast_fp16 = add(x = var_496_cast_fp16, y = var_510_cast_fp16)[name = string("k_state_3_cast_fp16")]; tensor expand_dims_12 = const()[name = string("expand_dims_12"), val = tensor([0])]; tensor expand_dims_13 = const()[name = string("expand_dims_13"), val = tensor([0])]; tensor expand_dims_15 = const()[name = string("expand_dims_15"), val = tensor([0])]; tensor concat_25_values0_0 = const()[name = string("concat_25_values0_0"), val = tensor([1])]; int32 concat_25_axis_0 = const()[name = string("concat_25_axis_0"), val = int32(0)]; bool concat_25_interleave_0 = const()[name = string("concat_25_interleave_0"), val = bool(false)]; tensor concat_25 = concat(axis = concat_25_axis_0, interleave = concat_25_interleave_0, values = (concat_25_values0_0, expand_dims_12, expand_dims_13, expand_dims_2, expand_dims_15))[name = string("concat_25")]; tensor key_cache_internal_tensor_assign_2_stride_0 = const()[name = string("key_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_25, begin_mask = key_cache_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_2_squeeze_mask_0, stride = key_cache_internal_tensor_assign_2_stride_0, update = k_state_3_cast_fp16, x = coreml_update_state_56)[name = string("key_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_2_cast_fp16, input = key_cache)[name = string("coreml_update_state_58_write_state")]; tensor coreml_update_state_58 = read_state(input = key_cache)[name = string("coreml_update_state_58")]; tensor value_cache_internal_tensor_assign_2_stride_0 = const()[name = string("value_cache_internal_tensor_assign_2_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_2_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_2_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_2_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_2_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_3_cast_fp16 = transpose(perm = v_state_3_perm_0, x = var_476_cast_fp16)[name = string("transpose_105")]; tensor value_cache_internal_tensor_assign_2_cast_fp16 = slice_update(begin = concat_25, begin_mask = value_cache_internal_tensor_assign_2_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_2_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_2_squeeze_mask_0, stride = value_cache_internal_tensor_assign_2_stride_0, update = v_state_3_cast_fp16, x = coreml_update_state_57)[name = string("value_cache_internal_tensor_assign_2_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_2_cast_fp16, input = value_cache)[name = string("coreml_update_state_59_write_state")]; tensor coreml_update_state_59 = read_state(input = value_cache)[name = string("coreml_update_state_59")]; tensor var_533_begin_0 = const()[name = string("op_533_begin_0"), val = tensor([1, 0, 0, 0, 0])]; tensor var_533_end_0 = const()[name = string("op_533_end_0"), val = tensor([2, 1, 8, 2048, 128])]; tensor var_533_end_mask_0 = const()[name = string("op_533_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_533_squeeze_mask_0 = const()[name = string("op_533_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_533_cast_fp16 = slice_by_index(begin = var_533_begin_0, end = var_533_end_0, end_mask = var_533_end_mask_0, squeeze_mask = var_533_squeeze_mask_0, x = coreml_update_state_58)[name = string("op_533_cast_fp16")]; tensor var_536_begin_0 = const()[name = string("op_536_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_536_end_mask_0 = const()[name = string("op_536_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_536_cast_fp16 = slice_by_index(begin = var_536_begin_0, end = concat_12, end_mask = var_536_end_mask_0, x = var_533_cast_fp16)[name = string("op_536_cast_fp16")]; tensor var_538_begin_0 = const()[name = string("op_538_begin_0"), val = tensor([1, 0, 0, 0, 0])]; tensor var_538_end_0 = const()[name = string("op_538_end_0"), val = tensor([2, 1, 8, 2048, 128])]; tensor var_538_end_mask_0 = const()[name = string("op_538_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_538_squeeze_mask_0 = const()[name = string("op_538_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_538_cast_fp16 = slice_by_index(begin = var_538_begin_0, end = var_538_end_0, end_mask = var_538_end_mask_0, squeeze_mask = var_538_squeeze_mask_0, x = coreml_update_state_59)[name = string("op_538_cast_fp16")]; tensor var_541_begin_0 = const()[name = string("op_541_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_541_end_mask_0 = const()[name = string("op_541_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_541_cast_fp16 = slice_by_index(begin = var_541_begin_0, end = concat_12, end_mask = var_541_end_mask_0, x = var_538_cast_fp16)[name = string("op_541_cast_fp16")]; tensor var_543_shape_cast_fp16 = shape(x = var_536_cast_fp16)[name = string("op_543_shape_cast_fp16")]; int32 gather_31 = const()[name = string("gather_31"), val = int32(1)]; int32 gather_32 = const()[name = string("gather_32"), val = int32(8)]; int32 gather_33_axis_0 = const()[name = string("gather_33_axis_0"), val = int32(0)]; int32 gather_33_batch_dims_0 = const()[name = string("gather_33_batch_dims_0"), val = int32(0)]; bool gather_33_validate_indices_0 = const()[name = string("gather_33_validate_indices_0"), val = bool(false)]; string var_543_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_543_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_33_to_uint16 = const()[name = string("select_33_to_uint16"), val = uint16(2)]; tensor var_543_shape_cast_fp16_to_uint16 = cast(dtype = var_543_shape_cast_fp16_to_uint16_dtype_0, x = var_543_shape_cast_fp16)[name = string("cast_728")]; uint16 gather_33_cast_uint16 = gather(axis = gather_33_axis_0, batch_dims = gather_33_batch_dims_0, indices = select_33_to_uint16, validate_indices = gather_33_validate_indices_0, x = var_543_shape_cast_fp16_to_uint16)[name = string("gather_33_cast_uint16")]; string gather_33_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_33_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_34 = const()[name = string("gather_34"), val = int32(128)]; tensor var_550_axes_0 = const()[name = string("op_550_axes_0"), val = tensor([2])]; tensor var_550_cast_fp16 = expand_dims(axes = var_550_axes_0, x = var_536_cast_fp16)[name = string("op_550_cast_fp16")]; int32 concat_33_axis_0 = const()[name = string("concat_33_axis_0"), val = int32(0)]; bool concat_33_interleave_0 = const()[name = string("concat_33_interleave_0"), val = bool(false)]; int32 gather_33_cast_uint16_to_int32 = cast(dtype = gather_33_cast_uint16_to_int32_dtype_0, x = gather_33_cast_uint16)[name = string("cast_727")]; tensor concat_33 = concat(axis = concat_33_axis_0, interleave = concat_33_interleave_0, values = (gather_31, gather_32, var_78, gather_33_cast_uint16_to_int32, gather_34))[name = string("concat_33")]; tensor shape_37_cast_fp16 = shape(x = var_550_cast_fp16)[name = string("shape_37_cast_fp16")]; tensor real_div_2 = real_div(x = concat_33, y = shape_37_cast_fp16)[name = string("real_div_2")]; tensor hidden_states_73_cast_fp16 = tile(reps = real_div_2, x = var_550_cast_fp16)[name = string("hidden_states_73_cast_fp16")]; tensor concat_34x = const()[name = string("concat_34x"), val = tensor([1, 16, -1, 128])]; tensor key_5_cast_fp16 = reshape(shape = concat_34x, x = hidden_states_73_cast_fp16)[name = string("key_5_cast_fp16")]; tensor var_560_shape_cast_fp16 = shape(x = var_541_cast_fp16)[name = string("op_560_shape_cast_fp16")]; int32 gather_35 = const()[name = string("gather_35"), val = int32(1)]; int32 gather_36 = const()[name = string("gather_36"), val = int32(8)]; int32 gather_37_axis_0 = const()[name = string("gather_37_axis_0"), val = int32(0)]; int32 gather_37_batch_dims_0 = const()[name = string("gather_37_batch_dims_0"), val = int32(0)]; bool gather_37_validate_indices_0 = const()[name = string("gather_37_validate_indices_0"), val = bool(false)]; string var_560_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_560_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_37_to_uint16 = const()[name = string("select_37_to_uint16"), val = uint16(2)]; tensor var_560_shape_cast_fp16_to_uint16 = cast(dtype = var_560_shape_cast_fp16_to_uint16_dtype_0, x = var_560_shape_cast_fp16)[name = string("cast_726")]; uint16 gather_37_cast_uint16 = gather(axis = gather_37_axis_0, batch_dims = gather_37_batch_dims_0, indices = select_37_to_uint16, validate_indices = gather_37_validate_indices_0, x = var_560_shape_cast_fp16_to_uint16)[name = string("gather_37_cast_uint16")]; string gather_37_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_37_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_38 = const()[name = string("gather_38"), val = int32(128)]; tensor var_567_axes_0 = const()[name = string("op_567_axes_0"), val = tensor([2])]; tensor var_567_cast_fp16 = expand_dims(axes = var_567_axes_0, x = var_541_cast_fp16)[name = string("op_567_cast_fp16")]; int32 concat_35_axis_0 = const()[name = string("concat_35_axis_0"), val = int32(0)]; bool concat_35_interleave_0 = const()[name = string("concat_35_interleave_0"), val = bool(false)]; int32 gather_37_cast_uint16_to_int32 = cast(dtype = gather_37_cast_uint16_to_int32_dtype_0, x = gather_37_cast_uint16)[name = string("cast_725")]; tensor concat_35 = concat(axis = concat_35_axis_0, interleave = concat_35_interleave_0, values = (gather_35, gather_36, var_78, gather_37_cast_uint16_to_int32, gather_38))[name = string("concat_35")]; tensor shape_42_cast_fp16 = shape(x = var_567_cast_fp16)[name = string("shape_42_cast_fp16")]; tensor real_div_3 = real_div(x = concat_35, y = shape_42_cast_fp16)[name = string("real_div_3")]; tensor hidden_states_77_cast_fp16 = tile(reps = real_div_3, x = var_567_cast_fp16)[name = string("hidden_states_77_cast_fp16")]; tensor concat_36x = const()[name = string("concat_36x"), val = tensor([1, 16, -1, 128])]; tensor value_5_cast_fp16 = reshape(shape = concat_36x, x = hidden_states_77_cast_fp16)[name = string("value_5_cast_fp16")]; tensor var_577_shape_cast_fp16 = shape(x = key_5_cast_fp16)[name = string("op_577_shape_cast_fp16")]; int32 gather_39_axis_0 = const()[name = string("gather_39_axis_0"), val = int32(0)]; int32 gather_39_batch_dims_0 = const()[name = string("gather_39_batch_dims_0"), val = int32(0)]; bool gather_39_validate_indices_0 = const()[name = string("gather_39_validate_indices_0"), val = bool(false)]; string var_577_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_577_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_39_to_uint16 = const()[name = string("select_39_to_uint16"), val = uint16(2)]; tensor var_577_shape_cast_fp16_to_uint16 = cast(dtype = var_577_shape_cast_fp16_to_uint16_dtype_0, x = var_577_shape_cast_fp16)[name = string("cast_724")]; uint16 gather_39_cast_uint16 = gather(axis = gather_39_axis_0, batch_dims = gather_39_batch_dims_0, indices = select_39_to_uint16, validate_indices = gather_39_validate_indices_0, x = var_577_shape_cast_fp16_to_uint16)[name = string("gather_39_cast_uint16")]; string gather_39_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_39_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_37_values0_0 = const()[name = string("concat_37_values0_0"), val = int32(1)]; int32 concat_37_values1_0 = const()[name = string("concat_37_values1_0"), val = int32(1)]; int32 concat_37_values2_0 = const()[name = string("concat_37_values2_0"), val = int32(0)]; int32 concat_37_axis_0 = const()[name = string("concat_37_axis_0"), val = int32(0)]; bool concat_37_interleave_0 = const()[name = string("concat_37_interleave_0"), val = bool(false)]; int32 gather_39_cast_uint16_to_int32 = cast(dtype = gather_39_cast_uint16_to_int32_dtype_0, x = gather_39_cast_uint16)[name = string("cast_723")]; tensor concat_37 = concat(axis = concat_37_axis_0, interleave = concat_37_interleave_0, values = (concat_37_values0_0, concat_37_values1_0, concat_37_values2_0, gather_39_cast_uint16_to_int32))[name = string("concat_37")]; tensor attention_mask_3_begin_0 = const()[name = string("attention_mask_3_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_3_end_mask_0 = const()[name = string("attention_mask_3_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_3_cast_fp16 = slice_by_index(begin = attention_mask_3_begin_0, end = concat_37, end_mask = attention_mask_3_end_mask_0, x = causal_mask)[name = string("attention_mask_3_cast_fp16")]; tensor mul_1_cast_fp16 = mul(x = query_5_cast_fp16, y = var_85_to_fp16)[name = string("mul_1_cast_fp16")]; bool matmul_1_transpose_y_0 = const()[name = string("matmul_1_transpose_y_0"), val = bool(true)]; bool matmul_1_transpose_x_0 = const()[name = string("matmul_1_transpose_x_0"), val = bool(false)]; tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = mul_1_cast_fp16, y = key_5_cast_fp16)[name = string("matmul_1_cast_fp16")]; tensor add_41_cast_fp16 = add(x = matmul_1_cast_fp16, y = attention_mask_3_cast_fp16)[name = string("add_41_cast_fp16")]; int32 softmax_1_axis_0 = const()[name = string("softmax_1_axis_0"), val = int32(-1)]; tensor softmax_1_cast_fp16 = softmax(axis = softmax_1_axis_0, x = add_41_cast_fp16)[name = string("softmax_1_cast_fp16")]; bool attn_output_5_transpose_x_0 = const()[name = string("attn_output_5_transpose_x_0"), val = bool(false)]; bool attn_output_5_transpose_y_0 = const()[name = string("attn_output_5_transpose_y_0"), val = bool(false)]; tensor attn_output_5_cast_fp16 = matmul(transpose_x = attn_output_5_transpose_x_0, transpose_y = attn_output_5_transpose_y_0, x = softmax_1_cast_fp16, y = value_5_cast_fp16)[name = string("attn_output_5_cast_fp16")]; tensor var_586_perm_0 = const()[name = string("op_586_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_38_axis_0 = const()[name = string("concat_38_axis_0"), val = int32(0)]; bool concat_38_interleave_0 = const()[name = string("concat_38_interleave_0"), val = bool(false)]; int32 gather_23_cast_uint16_to_int32 = cast(dtype = gather_23_cast_uint16_to_int32_dtype_0, x = gather_23_cast_uint16)[name = string("cast_729")]; tensor concat_38 = concat(axis = concat_38_axis_0, interleave = concat_38_interleave_0, values = (gather_22, gather_23_cast_uint16_to_int32, var_72))[name = string("concat_38")]; tensor var_586_cast_fp16 = transpose(perm = var_586_perm_0, x = attn_output_5_cast_fp16)[name = string("transpose_104")]; tensor var_589_cast_fp16 = reshape(shape = concat_38, x = var_586_cast_fp16)[name = string("op_589_cast_fp16")]; tensor model_model_layers_1_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_1_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(351031872)))]; tensor linear_10_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_1_self_attn_o_proj_weight_to_fp16, x = var_589_cast_fp16)[name = string("linear_10_cast_fp16")]; tensor hidden_states_81_cast_fp16 = add(x = hidden_states_45_cast_fp16, y = linear_10_cast_fp16)[name = string("hidden_states_81_cast_fp16")]; fp16 var_78_promoted_7_to_fp16 = const()[name = string("op_78_promoted_7_to_fp16"), val = fp16(0x1p+1)]; tensor var_596_cast_fp16 = pow(x = hidden_states_81_cast_fp16, y = var_78_promoted_7_to_fp16)[name = string("op_596_cast_fp16")]; tensor variance_15_axes_0 = const()[name = string("variance_15_axes_0"), val = tensor([-1])]; bool variance_15_keep_dims_0 = const()[name = string("variance_15_keep_dims_0"), val = bool(true)]; tensor variance_15_cast_fp16 = reduce_mean(axes = variance_15_axes_0, keep_dims = variance_15_keep_dims_0, x = var_596_cast_fp16)[name = string("variance_15_cast_fp16")]; fp16 var_599_to_fp16 = const()[name = string("op_599_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_600_cast_fp16 = add(x = variance_15_cast_fp16, y = var_599_to_fp16)[name = string("op_600_cast_fp16")]; fp32 var_601_epsilon_0 = const()[name = string("op_601_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_601_cast_fp16 = rsqrt(epsilon = var_601_epsilon_0, x = var_600_cast_fp16)[name = string("op_601_cast_fp16")]; tensor hidden_states_85_cast_fp16 = mul(x = hidden_states_81_cast_fp16, y = var_601_cast_fp16)[name = string("hidden_states_85_cast_fp16")]; tensor model_model_layers_1_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_1_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355226240)))]; tensor input_11_cast_fp16 = mul(x = model_model_layers_1_post_attention_layernorm_weight_to_fp16, y = hidden_states_85_cast_fp16)[name = string("input_11_cast_fp16")]; tensor model_model_layers_1_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_1_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(355228352)))]; tensor linear_11_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_1_mlp_gate_proj_weight_to_fp16, x = input_11_cast_fp16)[name = string("linear_11_cast_fp16")]; tensor var_613_cast_fp16 = silu(x = linear_11_cast_fp16)[name = string("op_613_cast_fp16")]; tensor model_model_layers_1_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_1_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(361519872)))]; tensor linear_12_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_1_mlp_up_proj_weight_to_fp16, x = input_11_cast_fp16)[name = string("linear_12_cast_fp16")]; tensor input_15_cast_fp16 = mul(x = var_613_cast_fp16, y = linear_12_cast_fp16)[name = string("input_15_cast_fp16")]; tensor model_model_layers_1_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_1_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(367811392)))]; tensor linear_13_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_1_mlp_down_proj_weight_to_fp16, x = input_15_cast_fp16)[name = string("linear_13_cast_fp16")]; tensor hidden_states_91_cast_fp16 = add(x = hidden_states_81_cast_fp16, y = linear_13_cast_fp16)[name = string("hidden_states_91_cast_fp16")]; fp16 var_78_promoted_8_to_fp16 = const()[name = string("op_78_promoted_8_to_fp16"), val = fp16(0x1p+1)]; tensor var_626_cast_fp16 = pow(x = hidden_states_91_cast_fp16, y = var_78_promoted_8_to_fp16)[name = string("op_626_cast_fp16")]; tensor variance_17_axes_0 = const()[name = string("variance_17_axes_0"), val = tensor([-1])]; bool variance_17_keep_dims_0 = const()[name = string("variance_17_keep_dims_0"), val = bool(true)]; tensor variance_17_cast_fp16 = reduce_mean(axes = variance_17_axes_0, keep_dims = variance_17_keep_dims_0, x = var_626_cast_fp16)[name = string("variance_17_cast_fp16")]; fp16 var_629_to_fp16 = const()[name = string("op_629_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_630_cast_fp16 = add(x = variance_17_cast_fp16, y = var_629_to_fp16)[name = string("op_630_cast_fp16")]; fp32 var_631_epsilon_0 = const()[name = string("op_631_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_631_cast_fp16 = rsqrt(epsilon = var_631_epsilon_0, x = var_630_cast_fp16)[name = string("op_631_cast_fp16")]; tensor hidden_states_95_cast_fp16 = mul(x = hidden_states_91_cast_fp16, y = var_631_cast_fp16)[name = string("hidden_states_95_cast_fp16")]; tensor model_model_layers_2_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374102912)))]; tensor hidden_states_99_cast_fp16 = mul(x = model_model_layers_2_input_layernorm_weight_to_fp16, y = hidden_states_95_cast_fp16)[name = string("hidden_states_99_cast_fp16")]; tensor var_644_shape_cast_fp16 = shape(x = hidden_states_99_cast_fp16)[name = string("op_644_shape_cast_fp16")]; int32 gather_40 = const()[name = string("gather_40"), val = int32(1)]; int32 gather_41_axis_0 = const()[name = string("gather_41_axis_0"), val = int32(0)]; int32 gather_41_batch_dims_0 = const()[name = string("gather_41_batch_dims_0"), val = int32(0)]; bool gather_41_validate_indices_0 = const()[name = string("gather_41_validate_indices_0"), val = bool(false)]; string var_644_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_644_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_41_to_uint16 = const()[name = string("select_41_to_uint16"), val = uint16(1)]; tensor var_644_shape_cast_fp16_to_uint16 = cast(dtype = var_644_shape_cast_fp16_to_uint16_dtype_0, x = var_644_shape_cast_fp16)[name = string("cast_722")]; uint16 gather_41_cast_uint16 = gather(axis = gather_41_axis_0, batch_dims = gather_41_batch_dims_0, indices = select_41_to_uint16, validate_indices = gather_41_validate_indices_0, x = var_644_shape_cast_fp16_to_uint16)[name = string("gather_41_cast_uint16")]; string gather_41_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_41_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_2_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(374105024)))]; tensor linear_14_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_2_self_attn_q_proj_weight_to_fp16, x = hidden_states_99_cast_fp16)[name = string("linear_14_cast_fp16")]; tensor concat_39x = const()[name = string("concat_39x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_101_cast_fp16 = reshape(shape = concat_39x, x = linear_14_cast_fp16)[name = string("hidden_states_101_cast_fp16")]; fp16 var_78_promoted_9_to_fp16 = const()[name = string("op_78_promoted_9_to_fp16"), val = fp16(0x1p+1)]; tensor var_652_cast_fp16 = pow(x = hidden_states_101_cast_fp16, y = var_78_promoted_9_to_fp16)[name = string("op_652_cast_fp16")]; tensor variance_19_axes_0 = const()[name = string("variance_19_axes_0"), val = tensor([-1])]; bool variance_19_keep_dims_0 = const()[name = string("variance_19_keep_dims_0"), val = bool(true)]; tensor variance_19_cast_fp16 = reduce_mean(axes = variance_19_axes_0, keep_dims = variance_19_keep_dims_0, x = var_652_cast_fp16)[name = string("variance_19_cast_fp16")]; fp16 var_655_to_fp16 = const()[name = string("op_655_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_656_cast_fp16 = add(x = variance_19_cast_fp16, y = var_655_to_fp16)[name = string("op_656_cast_fp16")]; fp32 var_657_epsilon_0 = const()[name = string("op_657_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_657_cast_fp16 = rsqrt(epsilon = var_657_epsilon_0, x = var_656_cast_fp16)[name = string("op_657_cast_fp16")]; tensor hidden_states_105_cast_fp16 = mul(x = hidden_states_101_cast_fp16, y = var_657_cast_fp16)[name = string("hidden_states_105_cast_fp16")]; tensor model_model_layers_2_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(378299392)))]; tensor var_660_cast_fp16 = mul(x = model_model_layers_2_self_attn_q_norm_weight_to_fp16, y = hidden_states_105_cast_fp16)[name = string("op_660_cast_fp16")]; tensor q_5_perm_0 = const()[name = string("q_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_2_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(378299712)))]; tensor linear_15_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_2_self_attn_k_proj_weight_to_fp16, x = hidden_states_99_cast_fp16)[name = string("linear_15_cast_fp16")]; tensor concat_40x = const()[name = string("concat_40x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_109_cast_fp16 = reshape(shape = concat_40x, x = linear_15_cast_fp16)[name = string("hidden_states_109_cast_fp16")]; fp16 var_78_promoted_10_to_fp16 = const()[name = string("op_78_promoted_10_to_fp16"), val = fp16(0x1p+1)]; tensor var_668_cast_fp16 = pow(x = hidden_states_109_cast_fp16, y = var_78_promoted_10_to_fp16)[name = string("op_668_cast_fp16")]; tensor variance_21_axes_0 = const()[name = string("variance_21_axes_0"), val = tensor([-1])]; bool variance_21_keep_dims_0 = const()[name = string("variance_21_keep_dims_0"), val = bool(true)]; tensor variance_21_cast_fp16 = reduce_mean(axes = variance_21_axes_0, keep_dims = variance_21_keep_dims_0, x = var_668_cast_fp16)[name = string("variance_21_cast_fp16")]; fp16 var_671_to_fp16 = const()[name = string("op_671_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_672_cast_fp16 = add(x = variance_21_cast_fp16, y = var_671_to_fp16)[name = string("op_672_cast_fp16")]; fp32 var_673_epsilon_0 = const()[name = string("op_673_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_673_cast_fp16 = rsqrt(epsilon = var_673_epsilon_0, x = var_672_cast_fp16)[name = string("op_673_cast_fp16")]; tensor hidden_states_113_cast_fp16 = mul(x = hidden_states_109_cast_fp16, y = var_673_cast_fp16)[name = string("hidden_states_113_cast_fp16")]; tensor model_model_layers_2_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380396928)))]; tensor var_676_cast_fp16 = mul(x = model_model_layers_2_self_attn_k_norm_weight_to_fp16, y = hidden_states_113_cast_fp16)[name = string("op_676_cast_fp16")]; tensor k_5_perm_0 = const()[name = string("k_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_2_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(380397248)))]; tensor linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_2_self_attn_v_proj_weight_to_fp16, x = hidden_states_99_cast_fp16)[name = string("linear_16_cast_fp16")]; tensor concat_41x = const()[name = string("concat_41x"), val = tensor([1, -1, 8, 128])]; tensor var_681_cast_fp16 = reshape(shape = concat_41x, x = linear_16_cast_fp16)[name = string("op_681_cast_fp16")]; tensor v_state_5_perm_0 = const()[name = string("v_state_5_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_5_cast_fp16 = transpose(perm = q_5_perm_0, x = var_660_cast_fp16)[name = string("transpose_103")]; tensor var_685_cast_fp16 = mul(x = q_5_cast_fp16, y = cos_5_cast_fp16)[name = string("op_685_cast_fp16")]; tensor x1_9_begin_0 = const()[name = string("x1_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_9_end_0 = const()[name = string("x1_9_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_9_end_mask_0 = const()[name = string("x1_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_9_cast_fp16 = slice_by_index(begin = x1_9_begin_0, end = x1_9_end_0, end_mask = x1_9_end_mask_0, x = q_5_cast_fp16)[name = string("x1_9_cast_fp16")]; tensor x2_9_begin_0 = const()[name = string("x2_9_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_9_end_0 = const()[name = string("x2_9_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_9_end_mask_0 = const()[name = string("x2_9_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_9_cast_fp16 = slice_by_index(begin = x2_9_begin_0, end = x2_9_end_0, end_mask = x2_9_end_mask_0, x = q_5_cast_fp16)[name = string("x2_9_cast_fp16")]; fp16 const_8_promoted_to_fp16 = const()[name = string("const_8_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_696_cast_fp16 = mul(x = x2_9_cast_fp16, y = const_8_promoted_to_fp16)[name = string("op_696_cast_fp16")]; bool var_698_interleave_0 = const()[name = string("op_698_interleave_0"), val = bool(false)]; tensor var_698_cast_fp16 = concat(axis = var_72, interleave = var_698_interleave_0, values = (var_696_cast_fp16, x1_9_cast_fp16))[name = string("op_698_cast_fp16")]; tensor var_699_cast_fp16 = mul(x = var_698_cast_fp16, y = sin_5_cast_fp16)[name = string("op_699_cast_fp16")]; tensor query_9_cast_fp16 = add(x = var_685_cast_fp16, y = var_699_cast_fp16)[name = string("query_9_cast_fp16")]; tensor k_5_cast_fp16 = transpose(perm = k_5_perm_0, x = var_676_cast_fp16)[name = string("transpose_102")]; tensor var_701_cast_fp16 = mul(x = k_5_cast_fp16, y = cos_5_cast_fp16)[name = string("op_701_cast_fp16")]; tensor x1_11_begin_0 = const()[name = string("x1_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_11_end_0 = const()[name = string("x1_11_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_11_end_mask_0 = const()[name = string("x1_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_11_cast_fp16 = slice_by_index(begin = x1_11_begin_0, end = x1_11_end_0, end_mask = x1_11_end_mask_0, x = k_5_cast_fp16)[name = string("x1_11_cast_fp16")]; tensor x2_11_begin_0 = const()[name = string("x2_11_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_11_end_0 = const()[name = string("x2_11_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_11_end_mask_0 = const()[name = string("x2_11_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_11_cast_fp16 = slice_by_index(begin = x2_11_begin_0, end = x2_11_end_0, end_mask = x2_11_end_mask_0, x = k_5_cast_fp16)[name = string("x2_11_cast_fp16")]; fp16 const_9_promoted_to_fp16 = const()[name = string("const_9_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_712_cast_fp16 = mul(x = x2_11_cast_fp16, y = const_9_promoted_to_fp16)[name = string("op_712_cast_fp16")]; bool var_714_interleave_0 = const()[name = string("op_714_interleave_0"), val = bool(false)]; tensor var_714_cast_fp16 = concat(axis = var_72, interleave = var_714_interleave_0, values = (var_712_cast_fp16, x1_11_cast_fp16))[name = string("op_714_cast_fp16")]; tensor var_715_cast_fp16 = mul(x = var_714_cast_fp16, y = sin_5_cast_fp16)[name = string("op_715_cast_fp16")]; tensor k_state_5_cast_fp16 = add(x = var_701_cast_fp16, y = var_715_cast_fp16)[name = string("k_state_5_cast_fp16")]; tensor expand_dims_24 = const()[name = string("expand_dims_24"), val = tensor([0])]; tensor expand_dims_25 = const()[name = string("expand_dims_25"), val = tensor([0])]; tensor expand_dims_27 = const()[name = string("expand_dims_27"), val = tensor([0])]; tensor concat_44_values0_0 = const()[name = string("concat_44_values0_0"), val = tensor([2])]; int32 concat_44_axis_0 = const()[name = string("concat_44_axis_0"), val = int32(0)]; bool concat_44_interleave_0 = const()[name = string("concat_44_interleave_0"), val = bool(false)]; tensor concat_44 = concat(axis = concat_44_axis_0, interleave = concat_44_interleave_0, values = (concat_44_values0_0, expand_dims_24, expand_dims_25, expand_dims_2, expand_dims_27))[name = string("concat_44")]; tensor key_cache_internal_tensor_assign_3_stride_0 = const()[name = string("key_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_44, begin_mask = key_cache_internal_tensor_assign_3_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_3_squeeze_mask_0, stride = key_cache_internal_tensor_assign_3_stride_0, update = k_state_5_cast_fp16, x = coreml_update_state_58)[name = string("key_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_3_cast_fp16, input = key_cache)[name = string("coreml_update_state_60_write_state")]; tensor coreml_update_state_60 = read_state(input = key_cache)[name = string("coreml_update_state_60")]; tensor value_cache_internal_tensor_assign_3_stride_0 = const()[name = string("value_cache_internal_tensor_assign_3_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_3_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_3_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_3_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_3_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_5_cast_fp16 = transpose(perm = v_state_5_perm_0, x = var_681_cast_fp16)[name = string("transpose_101")]; tensor value_cache_internal_tensor_assign_3_cast_fp16 = slice_update(begin = concat_44, begin_mask = value_cache_internal_tensor_assign_3_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_3_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_3_squeeze_mask_0, stride = value_cache_internal_tensor_assign_3_stride_0, update = v_state_5_cast_fp16, x = coreml_update_state_59)[name = string("value_cache_internal_tensor_assign_3_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_3_cast_fp16, input = value_cache)[name = string("coreml_update_state_61_write_state")]; tensor coreml_update_state_61 = read_state(input = value_cache)[name = string("coreml_update_state_61")]; tensor var_738_begin_0 = const()[name = string("op_738_begin_0"), val = tensor([2, 0, 0, 0, 0])]; tensor var_738_end_0 = const()[name = string("op_738_end_0"), val = tensor([3, 1, 8, 2048, 128])]; tensor var_738_end_mask_0 = const()[name = string("op_738_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_738_squeeze_mask_0 = const()[name = string("op_738_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_738_cast_fp16 = slice_by_index(begin = var_738_begin_0, end = var_738_end_0, end_mask = var_738_end_mask_0, squeeze_mask = var_738_squeeze_mask_0, x = coreml_update_state_60)[name = string("op_738_cast_fp16")]; tensor var_741_begin_0 = const()[name = string("op_741_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_741_end_mask_0 = const()[name = string("op_741_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_741_cast_fp16 = slice_by_index(begin = var_741_begin_0, end = concat_12, end_mask = var_741_end_mask_0, x = var_738_cast_fp16)[name = string("op_741_cast_fp16")]; tensor var_743_begin_0 = const()[name = string("op_743_begin_0"), val = tensor([2, 0, 0, 0, 0])]; tensor var_743_end_0 = const()[name = string("op_743_end_0"), val = tensor([3, 1, 8, 2048, 128])]; tensor var_743_end_mask_0 = const()[name = string("op_743_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_743_squeeze_mask_0 = const()[name = string("op_743_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_743_cast_fp16 = slice_by_index(begin = var_743_begin_0, end = var_743_end_0, end_mask = var_743_end_mask_0, squeeze_mask = var_743_squeeze_mask_0, x = coreml_update_state_61)[name = string("op_743_cast_fp16")]; tensor var_746_begin_0 = const()[name = string("op_746_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_746_end_mask_0 = const()[name = string("op_746_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_746_cast_fp16 = slice_by_index(begin = var_746_begin_0, end = concat_12, end_mask = var_746_end_mask_0, x = var_743_cast_fp16)[name = string("op_746_cast_fp16")]; tensor var_748_shape_cast_fp16 = shape(x = var_741_cast_fp16)[name = string("op_748_shape_cast_fp16")]; int32 gather_49 = const()[name = string("gather_49"), val = int32(1)]; int32 gather_50 = const()[name = string("gather_50"), val = int32(8)]; int32 gather_51_axis_0 = const()[name = string("gather_51_axis_0"), val = int32(0)]; int32 gather_51_batch_dims_0 = const()[name = string("gather_51_batch_dims_0"), val = int32(0)]; bool gather_51_validate_indices_0 = const()[name = string("gather_51_validate_indices_0"), val = bool(false)]; string var_748_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_748_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_51_to_uint16 = const()[name = string("select_51_to_uint16"), val = uint16(2)]; tensor var_748_shape_cast_fp16_to_uint16 = cast(dtype = var_748_shape_cast_fp16_to_uint16_dtype_0, x = var_748_shape_cast_fp16)[name = string("cast_720")]; uint16 gather_51_cast_uint16 = gather(axis = gather_51_axis_0, batch_dims = gather_51_batch_dims_0, indices = select_51_to_uint16, validate_indices = gather_51_validate_indices_0, x = var_748_shape_cast_fp16_to_uint16)[name = string("gather_51_cast_uint16")]; string gather_51_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_51_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_52 = const()[name = string("gather_52"), val = int32(128)]; tensor var_755_axes_0 = const()[name = string("op_755_axes_0"), val = tensor([2])]; tensor var_755_cast_fp16 = expand_dims(axes = var_755_axes_0, x = var_741_cast_fp16)[name = string("op_755_cast_fp16")]; int32 concat_52_axis_0 = const()[name = string("concat_52_axis_0"), val = int32(0)]; bool concat_52_interleave_0 = const()[name = string("concat_52_interleave_0"), val = bool(false)]; int32 gather_51_cast_uint16_to_int32 = cast(dtype = gather_51_cast_uint16_to_int32_dtype_0, x = gather_51_cast_uint16)[name = string("cast_719")]; tensor concat_52 = concat(axis = concat_52_axis_0, interleave = concat_52_interleave_0, values = (gather_49, gather_50, var_78, gather_51_cast_uint16_to_int32, gather_52))[name = string("concat_52")]; tensor shape_57_cast_fp16 = shape(x = var_755_cast_fp16)[name = string("shape_57_cast_fp16")]; tensor real_div_4 = real_div(x = concat_52, y = shape_57_cast_fp16)[name = string("real_div_4")]; tensor hidden_states_119_cast_fp16 = tile(reps = real_div_4, x = var_755_cast_fp16)[name = string("hidden_states_119_cast_fp16")]; tensor concat_53x = const()[name = string("concat_53x"), val = tensor([1, 16, -1, 128])]; tensor key_9_cast_fp16 = reshape(shape = concat_53x, x = hidden_states_119_cast_fp16)[name = string("key_9_cast_fp16")]; tensor var_765_shape_cast_fp16 = shape(x = var_746_cast_fp16)[name = string("op_765_shape_cast_fp16")]; int32 gather_53 = const()[name = string("gather_53"), val = int32(1)]; int32 gather_54 = const()[name = string("gather_54"), val = int32(8)]; int32 gather_55_axis_0 = const()[name = string("gather_55_axis_0"), val = int32(0)]; int32 gather_55_batch_dims_0 = const()[name = string("gather_55_batch_dims_0"), val = int32(0)]; bool gather_55_validate_indices_0 = const()[name = string("gather_55_validate_indices_0"), val = bool(false)]; string var_765_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_765_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_55_to_uint16 = const()[name = string("select_55_to_uint16"), val = uint16(2)]; tensor var_765_shape_cast_fp16_to_uint16 = cast(dtype = var_765_shape_cast_fp16_to_uint16_dtype_0, x = var_765_shape_cast_fp16)[name = string("cast_718")]; uint16 gather_55_cast_uint16 = gather(axis = gather_55_axis_0, batch_dims = gather_55_batch_dims_0, indices = select_55_to_uint16, validate_indices = gather_55_validate_indices_0, x = var_765_shape_cast_fp16_to_uint16)[name = string("gather_55_cast_uint16")]; string gather_55_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_55_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_56 = const()[name = string("gather_56"), val = int32(128)]; tensor var_772_axes_0 = const()[name = string("op_772_axes_0"), val = tensor([2])]; tensor var_772_cast_fp16 = expand_dims(axes = var_772_axes_0, x = var_746_cast_fp16)[name = string("op_772_cast_fp16")]; int32 concat_54_axis_0 = const()[name = string("concat_54_axis_0"), val = int32(0)]; bool concat_54_interleave_0 = const()[name = string("concat_54_interleave_0"), val = bool(false)]; int32 gather_55_cast_uint16_to_int32 = cast(dtype = gather_55_cast_uint16_to_int32_dtype_0, x = gather_55_cast_uint16)[name = string("cast_717")]; tensor concat_54 = concat(axis = concat_54_axis_0, interleave = concat_54_interleave_0, values = (gather_53, gather_54, var_78, gather_55_cast_uint16_to_int32, gather_56))[name = string("concat_54")]; tensor shape_62_cast_fp16 = shape(x = var_772_cast_fp16)[name = string("shape_62_cast_fp16")]; tensor real_div_5 = real_div(x = concat_54, y = shape_62_cast_fp16)[name = string("real_div_5")]; tensor hidden_states_123_cast_fp16 = tile(reps = real_div_5, x = var_772_cast_fp16)[name = string("hidden_states_123_cast_fp16")]; tensor concat_55x = const()[name = string("concat_55x"), val = tensor([1, 16, -1, 128])]; tensor value_9_cast_fp16 = reshape(shape = concat_55x, x = hidden_states_123_cast_fp16)[name = string("value_9_cast_fp16")]; tensor var_782_shape_cast_fp16 = shape(x = key_9_cast_fp16)[name = string("op_782_shape_cast_fp16")]; int32 gather_57_axis_0 = const()[name = string("gather_57_axis_0"), val = int32(0)]; int32 gather_57_batch_dims_0 = const()[name = string("gather_57_batch_dims_0"), val = int32(0)]; bool gather_57_validate_indices_0 = const()[name = string("gather_57_validate_indices_0"), val = bool(false)]; string var_782_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_782_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_57_to_uint16 = const()[name = string("select_57_to_uint16"), val = uint16(2)]; tensor var_782_shape_cast_fp16_to_uint16 = cast(dtype = var_782_shape_cast_fp16_to_uint16_dtype_0, x = var_782_shape_cast_fp16)[name = string("cast_716")]; uint16 gather_57_cast_uint16 = gather(axis = gather_57_axis_0, batch_dims = gather_57_batch_dims_0, indices = select_57_to_uint16, validate_indices = gather_57_validate_indices_0, x = var_782_shape_cast_fp16_to_uint16)[name = string("gather_57_cast_uint16")]; string gather_57_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_57_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_56_values0_0 = const()[name = string("concat_56_values0_0"), val = int32(1)]; int32 concat_56_values1_0 = const()[name = string("concat_56_values1_0"), val = int32(1)]; int32 concat_56_values2_0 = const()[name = string("concat_56_values2_0"), val = int32(0)]; int32 concat_56_axis_0 = const()[name = string("concat_56_axis_0"), val = int32(0)]; bool concat_56_interleave_0 = const()[name = string("concat_56_interleave_0"), val = bool(false)]; int32 gather_57_cast_uint16_to_int32 = cast(dtype = gather_57_cast_uint16_to_int32_dtype_0, x = gather_57_cast_uint16)[name = string("cast_715")]; tensor concat_56 = concat(axis = concat_56_axis_0, interleave = concat_56_interleave_0, values = (concat_56_values0_0, concat_56_values1_0, concat_56_values2_0, gather_57_cast_uint16_to_int32))[name = string("concat_56")]; tensor attention_mask_5_begin_0 = const()[name = string("attention_mask_5_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_5_end_mask_0 = const()[name = string("attention_mask_5_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_5_cast_fp16 = slice_by_index(begin = attention_mask_5_begin_0, end = concat_56, end_mask = attention_mask_5_end_mask_0, x = causal_mask)[name = string("attention_mask_5_cast_fp16")]; tensor mul_2_cast_fp16 = mul(x = query_9_cast_fp16, y = var_85_to_fp16)[name = string("mul_2_cast_fp16")]; bool matmul_2_transpose_y_0 = const()[name = string("matmul_2_transpose_y_0"), val = bool(true)]; bool matmul_2_transpose_x_0 = const()[name = string("matmul_2_transpose_x_0"), val = bool(false)]; tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = mul_2_cast_fp16, y = key_9_cast_fp16)[name = string("matmul_2_cast_fp16")]; tensor add_60_cast_fp16 = add(x = matmul_2_cast_fp16, y = attention_mask_5_cast_fp16)[name = string("add_60_cast_fp16")]; int32 softmax_2_axis_0 = const()[name = string("softmax_2_axis_0"), val = int32(-1)]; tensor softmax_2_cast_fp16 = softmax(axis = softmax_2_axis_0, x = add_60_cast_fp16)[name = string("softmax_2_cast_fp16")]; bool attn_output_9_transpose_x_0 = const()[name = string("attn_output_9_transpose_x_0"), val = bool(false)]; bool attn_output_9_transpose_y_0 = const()[name = string("attn_output_9_transpose_y_0"), val = bool(false)]; tensor attn_output_9_cast_fp16 = matmul(transpose_x = attn_output_9_transpose_x_0, transpose_y = attn_output_9_transpose_y_0, x = softmax_2_cast_fp16, y = value_9_cast_fp16)[name = string("attn_output_9_cast_fp16")]; tensor var_791_perm_0 = const()[name = string("op_791_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_57_axis_0 = const()[name = string("concat_57_axis_0"), val = int32(0)]; bool concat_57_interleave_0 = const()[name = string("concat_57_interleave_0"), val = bool(false)]; int32 gather_41_cast_uint16_to_int32 = cast(dtype = gather_41_cast_uint16_to_int32_dtype_0, x = gather_41_cast_uint16)[name = string("cast_721")]; tensor concat_57 = concat(axis = concat_57_axis_0, interleave = concat_57_interleave_0, values = (gather_40, gather_41_cast_uint16_to_int32, var_72))[name = string("concat_57")]; tensor var_791_cast_fp16 = transpose(perm = var_791_perm_0, x = attn_output_9_cast_fp16)[name = string("transpose_100")]; tensor var_794_cast_fp16 = reshape(shape = concat_57, x = var_791_cast_fp16)[name = string("op_794_cast_fp16")]; tensor model_model_layers_2_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_2_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(382494464)))]; tensor linear_17_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_2_self_attn_o_proj_weight_to_fp16, x = var_794_cast_fp16)[name = string("linear_17_cast_fp16")]; tensor hidden_states_127_cast_fp16 = add(x = hidden_states_91_cast_fp16, y = linear_17_cast_fp16)[name = string("hidden_states_127_cast_fp16")]; fp16 var_78_promoted_11_to_fp16 = const()[name = string("op_78_promoted_11_to_fp16"), val = fp16(0x1p+1)]; tensor var_801_cast_fp16 = pow(x = hidden_states_127_cast_fp16, y = var_78_promoted_11_to_fp16)[name = string("op_801_cast_fp16")]; tensor variance_23_axes_0 = const()[name = string("variance_23_axes_0"), val = tensor([-1])]; bool variance_23_keep_dims_0 = const()[name = string("variance_23_keep_dims_0"), val = bool(true)]; tensor variance_23_cast_fp16 = reduce_mean(axes = variance_23_axes_0, keep_dims = variance_23_keep_dims_0, x = var_801_cast_fp16)[name = string("variance_23_cast_fp16")]; fp16 var_804_to_fp16 = const()[name = string("op_804_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_805_cast_fp16 = add(x = variance_23_cast_fp16, y = var_804_to_fp16)[name = string("op_805_cast_fp16")]; fp32 var_806_epsilon_0 = const()[name = string("op_806_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_806_cast_fp16 = rsqrt(epsilon = var_806_epsilon_0, x = var_805_cast_fp16)[name = string("op_806_cast_fp16")]; tensor hidden_states_131_cast_fp16 = mul(x = hidden_states_127_cast_fp16, y = var_806_cast_fp16)[name = string("hidden_states_131_cast_fp16")]; tensor model_model_layers_2_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_2_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386688832)))]; tensor input_19_cast_fp16 = mul(x = model_model_layers_2_post_attention_layernorm_weight_to_fp16, y = hidden_states_131_cast_fp16)[name = string("input_19_cast_fp16")]; tensor model_model_layers_2_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_2_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(386690944)))]; tensor linear_18_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_2_mlp_gate_proj_weight_to_fp16, x = input_19_cast_fp16)[name = string("linear_18_cast_fp16")]; tensor var_818_cast_fp16 = silu(x = linear_18_cast_fp16)[name = string("op_818_cast_fp16")]; tensor model_model_layers_2_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_2_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(392982464)))]; tensor linear_19_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_2_mlp_up_proj_weight_to_fp16, x = input_19_cast_fp16)[name = string("linear_19_cast_fp16")]; tensor input_23_cast_fp16 = mul(x = var_818_cast_fp16, y = linear_19_cast_fp16)[name = string("input_23_cast_fp16")]; tensor model_model_layers_2_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_2_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(399273984)))]; tensor linear_20_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_2_mlp_down_proj_weight_to_fp16, x = input_23_cast_fp16)[name = string("linear_20_cast_fp16")]; tensor hidden_states_137_cast_fp16 = add(x = hidden_states_127_cast_fp16, y = linear_20_cast_fp16)[name = string("hidden_states_137_cast_fp16")]; fp16 var_78_promoted_12_to_fp16 = const()[name = string("op_78_promoted_12_to_fp16"), val = fp16(0x1p+1)]; tensor var_831_cast_fp16 = pow(x = hidden_states_137_cast_fp16, y = var_78_promoted_12_to_fp16)[name = string("op_831_cast_fp16")]; tensor variance_25_axes_0 = const()[name = string("variance_25_axes_0"), val = tensor([-1])]; bool variance_25_keep_dims_0 = const()[name = string("variance_25_keep_dims_0"), val = bool(true)]; tensor variance_25_cast_fp16 = reduce_mean(axes = variance_25_axes_0, keep_dims = variance_25_keep_dims_0, x = var_831_cast_fp16)[name = string("variance_25_cast_fp16")]; fp16 var_834_to_fp16 = const()[name = string("op_834_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_835_cast_fp16 = add(x = variance_25_cast_fp16, y = var_834_to_fp16)[name = string("op_835_cast_fp16")]; fp32 var_836_epsilon_0 = const()[name = string("op_836_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_836_cast_fp16 = rsqrt(epsilon = var_836_epsilon_0, x = var_835_cast_fp16)[name = string("op_836_cast_fp16")]; tensor hidden_states_141_cast_fp16 = mul(x = hidden_states_137_cast_fp16, y = var_836_cast_fp16)[name = string("hidden_states_141_cast_fp16")]; tensor model_model_layers_3_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(405565504)))]; tensor hidden_states_145_cast_fp16 = mul(x = model_model_layers_3_input_layernorm_weight_to_fp16, y = hidden_states_141_cast_fp16)[name = string("hidden_states_145_cast_fp16")]; tensor var_849_shape_cast_fp16 = shape(x = hidden_states_145_cast_fp16)[name = string("op_849_shape_cast_fp16")]; int32 gather_58 = const()[name = string("gather_58"), val = int32(1)]; int32 gather_59_axis_0 = const()[name = string("gather_59_axis_0"), val = int32(0)]; int32 gather_59_batch_dims_0 = const()[name = string("gather_59_batch_dims_0"), val = int32(0)]; bool gather_59_validate_indices_0 = const()[name = string("gather_59_validate_indices_0"), val = bool(false)]; string var_849_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_849_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_59_to_uint16 = const()[name = string("select_59_to_uint16"), val = uint16(1)]; tensor var_849_shape_cast_fp16_to_uint16 = cast(dtype = var_849_shape_cast_fp16_to_uint16_dtype_0, x = var_849_shape_cast_fp16)[name = string("cast_714")]; uint16 gather_59_cast_uint16 = gather(axis = gather_59_axis_0, batch_dims = gather_59_batch_dims_0, indices = select_59_to_uint16, validate_indices = gather_59_validate_indices_0, x = var_849_shape_cast_fp16_to_uint16)[name = string("gather_59_cast_uint16")]; string gather_59_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_59_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_3_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(405567616)))]; tensor linear_21_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_3_self_attn_q_proj_weight_to_fp16, x = hidden_states_145_cast_fp16)[name = string("linear_21_cast_fp16")]; tensor concat_58x = const()[name = string("concat_58x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_147_cast_fp16 = reshape(shape = concat_58x, x = linear_21_cast_fp16)[name = string("hidden_states_147_cast_fp16")]; fp16 var_78_promoted_13_to_fp16 = const()[name = string("op_78_promoted_13_to_fp16"), val = fp16(0x1p+1)]; tensor var_857_cast_fp16 = pow(x = hidden_states_147_cast_fp16, y = var_78_promoted_13_to_fp16)[name = string("op_857_cast_fp16")]; tensor variance_27_axes_0 = const()[name = string("variance_27_axes_0"), val = tensor([-1])]; bool variance_27_keep_dims_0 = const()[name = string("variance_27_keep_dims_0"), val = bool(true)]; tensor variance_27_cast_fp16 = reduce_mean(axes = variance_27_axes_0, keep_dims = variance_27_keep_dims_0, x = var_857_cast_fp16)[name = string("variance_27_cast_fp16")]; fp16 var_860_to_fp16 = const()[name = string("op_860_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_861_cast_fp16 = add(x = variance_27_cast_fp16, y = var_860_to_fp16)[name = string("op_861_cast_fp16")]; fp32 var_862_epsilon_0 = const()[name = string("op_862_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_862_cast_fp16 = rsqrt(epsilon = var_862_epsilon_0, x = var_861_cast_fp16)[name = string("op_862_cast_fp16")]; tensor hidden_states_151_cast_fp16 = mul(x = hidden_states_147_cast_fp16, y = var_862_cast_fp16)[name = string("hidden_states_151_cast_fp16")]; tensor model_model_layers_3_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409761984)))]; tensor var_865_cast_fp16 = mul(x = model_model_layers_3_self_attn_q_norm_weight_to_fp16, y = hidden_states_151_cast_fp16)[name = string("op_865_cast_fp16")]; tensor q_7_perm_0 = const()[name = string("q_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_3_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(409762304)))]; tensor linear_22_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_3_self_attn_k_proj_weight_to_fp16, x = hidden_states_145_cast_fp16)[name = string("linear_22_cast_fp16")]; tensor concat_59x = const()[name = string("concat_59x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_155_cast_fp16 = reshape(shape = concat_59x, x = linear_22_cast_fp16)[name = string("hidden_states_155_cast_fp16")]; fp16 var_78_promoted_14_to_fp16 = const()[name = string("op_78_promoted_14_to_fp16"), val = fp16(0x1p+1)]; tensor var_873_cast_fp16 = pow(x = hidden_states_155_cast_fp16, y = var_78_promoted_14_to_fp16)[name = string("op_873_cast_fp16")]; tensor variance_29_axes_0 = const()[name = string("variance_29_axes_0"), val = tensor([-1])]; bool variance_29_keep_dims_0 = const()[name = string("variance_29_keep_dims_0"), val = bool(true)]; tensor variance_29_cast_fp16 = reduce_mean(axes = variance_29_axes_0, keep_dims = variance_29_keep_dims_0, x = var_873_cast_fp16)[name = string("variance_29_cast_fp16")]; fp16 var_876_to_fp16 = const()[name = string("op_876_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_877_cast_fp16 = add(x = variance_29_cast_fp16, y = var_876_to_fp16)[name = string("op_877_cast_fp16")]; fp32 var_878_epsilon_0 = const()[name = string("op_878_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_878_cast_fp16 = rsqrt(epsilon = var_878_epsilon_0, x = var_877_cast_fp16)[name = string("op_878_cast_fp16")]; tensor hidden_states_159_cast_fp16 = mul(x = hidden_states_155_cast_fp16, y = var_878_cast_fp16)[name = string("hidden_states_159_cast_fp16")]; tensor model_model_layers_3_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411859520)))]; tensor var_881_cast_fp16 = mul(x = model_model_layers_3_self_attn_k_norm_weight_to_fp16, y = hidden_states_159_cast_fp16)[name = string("op_881_cast_fp16")]; tensor k_7_perm_0 = const()[name = string("k_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_3_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(411859840)))]; tensor linear_23_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_3_self_attn_v_proj_weight_to_fp16, x = hidden_states_145_cast_fp16)[name = string("linear_23_cast_fp16")]; tensor concat_60x = const()[name = string("concat_60x"), val = tensor([1, -1, 8, 128])]; tensor var_886_cast_fp16 = reshape(shape = concat_60x, x = linear_23_cast_fp16)[name = string("op_886_cast_fp16")]; tensor v_state_7_perm_0 = const()[name = string("v_state_7_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_7_cast_fp16 = transpose(perm = q_7_perm_0, x = var_865_cast_fp16)[name = string("transpose_99")]; tensor var_890_cast_fp16 = mul(x = q_7_cast_fp16, y = cos_5_cast_fp16)[name = string("op_890_cast_fp16")]; tensor x1_13_begin_0 = const()[name = string("x1_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_13_end_0 = const()[name = string("x1_13_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_13_end_mask_0 = const()[name = string("x1_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_13_cast_fp16 = slice_by_index(begin = x1_13_begin_0, end = x1_13_end_0, end_mask = x1_13_end_mask_0, x = q_7_cast_fp16)[name = string("x1_13_cast_fp16")]; tensor x2_13_begin_0 = const()[name = string("x2_13_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_13_end_0 = const()[name = string("x2_13_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_13_end_mask_0 = const()[name = string("x2_13_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_13_cast_fp16 = slice_by_index(begin = x2_13_begin_0, end = x2_13_end_0, end_mask = x2_13_end_mask_0, x = q_7_cast_fp16)[name = string("x2_13_cast_fp16")]; fp16 const_10_promoted_to_fp16 = const()[name = string("const_10_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_901_cast_fp16 = mul(x = x2_13_cast_fp16, y = const_10_promoted_to_fp16)[name = string("op_901_cast_fp16")]; bool var_903_interleave_0 = const()[name = string("op_903_interleave_0"), val = bool(false)]; tensor var_903_cast_fp16 = concat(axis = var_72, interleave = var_903_interleave_0, values = (var_901_cast_fp16, x1_13_cast_fp16))[name = string("op_903_cast_fp16")]; tensor var_904_cast_fp16 = mul(x = var_903_cast_fp16, y = sin_5_cast_fp16)[name = string("op_904_cast_fp16")]; tensor query_13_cast_fp16 = add(x = var_890_cast_fp16, y = var_904_cast_fp16)[name = string("query_13_cast_fp16")]; tensor k_7_cast_fp16 = transpose(perm = k_7_perm_0, x = var_881_cast_fp16)[name = string("transpose_98")]; tensor var_906_cast_fp16 = mul(x = k_7_cast_fp16, y = cos_5_cast_fp16)[name = string("op_906_cast_fp16")]; tensor x1_15_begin_0 = const()[name = string("x1_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_15_end_0 = const()[name = string("x1_15_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_15_end_mask_0 = const()[name = string("x1_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_15_cast_fp16 = slice_by_index(begin = x1_15_begin_0, end = x1_15_end_0, end_mask = x1_15_end_mask_0, x = k_7_cast_fp16)[name = string("x1_15_cast_fp16")]; tensor x2_15_begin_0 = const()[name = string("x2_15_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_15_end_0 = const()[name = string("x2_15_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_15_end_mask_0 = const()[name = string("x2_15_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_15_cast_fp16 = slice_by_index(begin = x2_15_begin_0, end = x2_15_end_0, end_mask = x2_15_end_mask_0, x = k_7_cast_fp16)[name = string("x2_15_cast_fp16")]; fp16 const_11_promoted_to_fp16 = const()[name = string("const_11_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_917_cast_fp16 = mul(x = x2_15_cast_fp16, y = const_11_promoted_to_fp16)[name = string("op_917_cast_fp16")]; bool var_919_interleave_0 = const()[name = string("op_919_interleave_0"), val = bool(false)]; tensor var_919_cast_fp16 = concat(axis = var_72, interleave = var_919_interleave_0, values = (var_917_cast_fp16, x1_15_cast_fp16))[name = string("op_919_cast_fp16")]; tensor var_920_cast_fp16 = mul(x = var_919_cast_fp16, y = sin_5_cast_fp16)[name = string("op_920_cast_fp16")]; tensor k_state_7_cast_fp16 = add(x = var_906_cast_fp16, y = var_920_cast_fp16)[name = string("k_state_7_cast_fp16")]; tensor expand_dims_36 = const()[name = string("expand_dims_36"), val = tensor([0])]; tensor expand_dims_37 = const()[name = string("expand_dims_37"), val = tensor([0])]; tensor expand_dims_39 = const()[name = string("expand_dims_39"), val = tensor([0])]; tensor concat_63_values0_0 = const()[name = string("concat_63_values0_0"), val = tensor([3])]; int32 concat_63_axis_0 = const()[name = string("concat_63_axis_0"), val = int32(0)]; bool concat_63_interleave_0 = const()[name = string("concat_63_interleave_0"), val = bool(false)]; tensor concat_63 = concat(axis = concat_63_axis_0, interleave = concat_63_interleave_0, values = (concat_63_values0_0, expand_dims_36, expand_dims_37, expand_dims_2, expand_dims_39))[name = string("concat_63")]; tensor key_cache_internal_tensor_assign_4_stride_0 = const()[name = string("key_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_63, begin_mask = key_cache_internal_tensor_assign_4_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_4_squeeze_mask_0, stride = key_cache_internal_tensor_assign_4_stride_0, update = k_state_7_cast_fp16, x = coreml_update_state_60)[name = string("key_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_4_cast_fp16, input = key_cache)[name = string("coreml_update_state_62_write_state")]; tensor coreml_update_state_62 = read_state(input = key_cache)[name = string("coreml_update_state_62")]; tensor value_cache_internal_tensor_assign_4_stride_0 = const()[name = string("value_cache_internal_tensor_assign_4_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_4_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_4_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_4_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_4_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_7_cast_fp16 = transpose(perm = v_state_7_perm_0, x = var_886_cast_fp16)[name = string("transpose_97")]; tensor value_cache_internal_tensor_assign_4_cast_fp16 = slice_update(begin = concat_63, begin_mask = value_cache_internal_tensor_assign_4_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_4_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_4_squeeze_mask_0, stride = value_cache_internal_tensor_assign_4_stride_0, update = v_state_7_cast_fp16, x = coreml_update_state_61)[name = string("value_cache_internal_tensor_assign_4_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_4_cast_fp16, input = value_cache)[name = string("coreml_update_state_63_write_state")]; tensor coreml_update_state_63 = read_state(input = value_cache)[name = string("coreml_update_state_63")]; tensor var_943_begin_0 = const()[name = string("op_943_begin_0"), val = tensor([3, 0, 0, 0, 0])]; tensor var_943_end_0 = const()[name = string("op_943_end_0"), val = tensor([4, 1, 8, 2048, 128])]; tensor var_943_end_mask_0 = const()[name = string("op_943_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_943_squeeze_mask_0 = const()[name = string("op_943_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_943_cast_fp16 = slice_by_index(begin = var_943_begin_0, end = var_943_end_0, end_mask = var_943_end_mask_0, squeeze_mask = var_943_squeeze_mask_0, x = coreml_update_state_62)[name = string("op_943_cast_fp16")]; tensor var_946_begin_0 = const()[name = string("op_946_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_946_end_mask_0 = const()[name = string("op_946_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_946_cast_fp16 = slice_by_index(begin = var_946_begin_0, end = concat_12, end_mask = var_946_end_mask_0, x = var_943_cast_fp16)[name = string("op_946_cast_fp16")]; tensor var_948_begin_0 = const()[name = string("op_948_begin_0"), val = tensor([3, 0, 0, 0, 0])]; tensor var_948_end_0 = const()[name = string("op_948_end_0"), val = tensor([4, 1, 8, 2048, 128])]; tensor var_948_end_mask_0 = const()[name = string("op_948_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_948_squeeze_mask_0 = const()[name = string("op_948_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, squeeze_mask = var_948_squeeze_mask_0, x = coreml_update_state_63)[name = string("op_948_cast_fp16")]; tensor var_951_begin_0 = const()[name = string("op_951_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_951_end_mask_0 = const()[name = string("op_951_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_951_cast_fp16 = slice_by_index(begin = var_951_begin_0, end = concat_12, end_mask = var_951_end_mask_0, x = var_948_cast_fp16)[name = string("op_951_cast_fp16")]; tensor var_953_shape_cast_fp16 = shape(x = var_946_cast_fp16)[name = string("op_953_shape_cast_fp16")]; int32 gather_67 = const()[name = string("gather_67"), val = int32(1)]; int32 gather_68 = const()[name = string("gather_68"), val = int32(8)]; int32 gather_69_axis_0 = const()[name = string("gather_69_axis_0"), val = int32(0)]; int32 gather_69_batch_dims_0 = const()[name = string("gather_69_batch_dims_0"), val = int32(0)]; bool gather_69_validate_indices_0 = const()[name = string("gather_69_validate_indices_0"), val = bool(false)]; string var_953_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_953_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_69_to_uint16 = const()[name = string("select_69_to_uint16"), val = uint16(2)]; tensor var_953_shape_cast_fp16_to_uint16 = cast(dtype = var_953_shape_cast_fp16_to_uint16_dtype_0, x = var_953_shape_cast_fp16)[name = string("cast_712")]; uint16 gather_69_cast_uint16 = gather(axis = gather_69_axis_0, batch_dims = gather_69_batch_dims_0, indices = select_69_to_uint16, validate_indices = gather_69_validate_indices_0, x = var_953_shape_cast_fp16_to_uint16)[name = string("gather_69_cast_uint16")]; string gather_69_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_69_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_70 = const()[name = string("gather_70"), val = int32(128)]; tensor var_960_axes_0 = const()[name = string("op_960_axes_0"), val = tensor([2])]; tensor var_960_cast_fp16 = expand_dims(axes = var_960_axes_0, x = var_946_cast_fp16)[name = string("op_960_cast_fp16")]; int32 concat_71_axis_0 = const()[name = string("concat_71_axis_0"), val = int32(0)]; bool concat_71_interleave_0 = const()[name = string("concat_71_interleave_0"), val = bool(false)]; int32 gather_69_cast_uint16_to_int32 = cast(dtype = gather_69_cast_uint16_to_int32_dtype_0, x = gather_69_cast_uint16)[name = string("cast_711")]; tensor concat_71 = concat(axis = concat_71_axis_0, interleave = concat_71_interleave_0, values = (gather_67, gather_68, var_78, gather_69_cast_uint16_to_int32, gather_70))[name = string("concat_71")]; tensor shape_77_cast_fp16 = shape(x = var_960_cast_fp16)[name = string("shape_77_cast_fp16")]; tensor real_div_6 = real_div(x = concat_71, y = shape_77_cast_fp16)[name = string("real_div_6")]; tensor hidden_states_165_cast_fp16 = tile(reps = real_div_6, x = var_960_cast_fp16)[name = string("hidden_states_165_cast_fp16")]; tensor concat_72x = const()[name = string("concat_72x"), val = tensor([1, 16, -1, 128])]; tensor key_13_cast_fp16 = reshape(shape = concat_72x, x = hidden_states_165_cast_fp16)[name = string("key_13_cast_fp16")]; tensor var_970_shape_cast_fp16 = shape(x = var_951_cast_fp16)[name = string("op_970_shape_cast_fp16")]; int32 gather_71 = const()[name = string("gather_71"), val = int32(1)]; int32 gather_72 = const()[name = string("gather_72"), val = int32(8)]; int32 gather_73_axis_0 = const()[name = string("gather_73_axis_0"), val = int32(0)]; int32 gather_73_batch_dims_0 = const()[name = string("gather_73_batch_dims_0"), val = int32(0)]; bool gather_73_validate_indices_0 = const()[name = string("gather_73_validate_indices_0"), val = bool(false)]; string var_970_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_970_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_73_to_uint16 = const()[name = string("select_73_to_uint16"), val = uint16(2)]; tensor var_970_shape_cast_fp16_to_uint16 = cast(dtype = var_970_shape_cast_fp16_to_uint16_dtype_0, x = var_970_shape_cast_fp16)[name = string("cast_710")]; uint16 gather_73_cast_uint16 = gather(axis = gather_73_axis_0, batch_dims = gather_73_batch_dims_0, indices = select_73_to_uint16, validate_indices = gather_73_validate_indices_0, x = var_970_shape_cast_fp16_to_uint16)[name = string("gather_73_cast_uint16")]; string gather_73_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_73_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_74 = const()[name = string("gather_74"), val = int32(128)]; tensor var_977_axes_0 = const()[name = string("op_977_axes_0"), val = tensor([2])]; tensor var_977_cast_fp16 = expand_dims(axes = var_977_axes_0, x = var_951_cast_fp16)[name = string("op_977_cast_fp16")]; int32 concat_73_axis_0 = const()[name = string("concat_73_axis_0"), val = int32(0)]; bool concat_73_interleave_0 = const()[name = string("concat_73_interleave_0"), val = bool(false)]; int32 gather_73_cast_uint16_to_int32 = cast(dtype = gather_73_cast_uint16_to_int32_dtype_0, x = gather_73_cast_uint16)[name = string("cast_709")]; tensor concat_73 = concat(axis = concat_73_axis_0, interleave = concat_73_interleave_0, values = (gather_71, gather_72, var_78, gather_73_cast_uint16_to_int32, gather_74))[name = string("concat_73")]; tensor shape_82_cast_fp16 = shape(x = var_977_cast_fp16)[name = string("shape_82_cast_fp16")]; tensor real_div_7 = real_div(x = concat_73, y = shape_82_cast_fp16)[name = string("real_div_7")]; tensor hidden_states_169_cast_fp16 = tile(reps = real_div_7, x = var_977_cast_fp16)[name = string("hidden_states_169_cast_fp16")]; tensor concat_74x = const()[name = string("concat_74x"), val = tensor([1, 16, -1, 128])]; tensor value_13_cast_fp16 = reshape(shape = concat_74x, x = hidden_states_169_cast_fp16)[name = string("value_13_cast_fp16")]; tensor var_987_shape_cast_fp16 = shape(x = key_13_cast_fp16)[name = string("op_987_shape_cast_fp16")]; int32 gather_75_axis_0 = const()[name = string("gather_75_axis_0"), val = int32(0)]; int32 gather_75_batch_dims_0 = const()[name = string("gather_75_batch_dims_0"), val = int32(0)]; bool gather_75_validate_indices_0 = const()[name = string("gather_75_validate_indices_0"), val = bool(false)]; string var_987_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_987_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_75_to_uint16 = const()[name = string("select_75_to_uint16"), val = uint16(2)]; tensor var_987_shape_cast_fp16_to_uint16 = cast(dtype = var_987_shape_cast_fp16_to_uint16_dtype_0, x = var_987_shape_cast_fp16)[name = string("cast_708")]; uint16 gather_75_cast_uint16 = gather(axis = gather_75_axis_0, batch_dims = gather_75_batch_dims_0, indices = select_75_to_uint16, validate_indices = gather_75_validate_indices_0, x = var_987_shape_cast_fp16_to_uint16)[name = string("gather_75_cast_uint16")]; string gather_75_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_75_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_75_values0_0 = const()[name = string("concat_75_values0_0"), val = int32(1)]; int32 concat_75_values1_0 = const()[name = string("concat_75_values1_0"), val = int32(1)]; int32 concat_75_values2_0 = const()[name = string("concat_75_values2_0"), val = int32(0)]; int32 concat_75_axis_0 = const()[name = string("concat_75_axis_0"), val = int32(0)]; bool concat_75_interleave_0 = const()[name = string("concat_75_interleave_0"), val = bool(false)]; int32 gather_75_cast_uint16_to_int32 = cast(dtype = gather_75_cast_uint16_to_int32_dtype_0, x = gather_75_cast_uint16)[name = string("cast_707")]; tensor concat_75 = concat(axis = concat_75_axis_0, interleave = concat_75_interleave_0, values = (concat_75_values0_0, concat_75_values1_0, concat_75_values2_0, gather_75_cast_uint16_to_int32))[name = string("concat_75")]; tensor attention_mask_7_begin_0 = const()[name = string("attention_mask_7_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_7_end_mask_0 = const()[name = string("attention_mask_7_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_7_cast_fp16 = slice_by_index(begin = attention_mask_7_begin_0, end = concat_75, end_mask = attention_mask_7_end_mask_0, x = causal_mask)[name = string("attention_mask_7_cast_fp16")]; tensor mul_3_cast_fp16 = mul(x = query_13_cast_fp16, y = var_85_to_fp16)[name = string("mul_3_cast_fp16")]; bool matmul_3_transpose_y_0 = const()[name = string("matmul_3_transpose_y_0"), val = bool(true)]; bool matmul_3_transpose_x_0 = const()[name = string("matmul_3_transpose_x_0"), val = bool(false)]; tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = mul_3_cast_fp16, y = key_13_cast_fp16)[name = string("matmul_3_cast_fp16")]; tensor add_79_cast_fp16 = add(x = matmul_3_cast_fp16, y = attention_mask_7_cast_fp16)[name = string("add_79_cast_fp16")]; int32 softmax_3_axis_0 = const()[name = string("softmax_3_axis_0"), val = int32(-1)]; tensor softmax_3_cast_fp16 = softmax(axis = softmax_3_axis_0, x = add_79_cast_fp16)[name = string("softmax_3_cast_fp16")]; bool attn_output_13_transpose_x_0 = const()[name = string("attn_output_13_transpose_x_0"), val = bool(false)]; bool attn_output_13_transpose_y_0 = const()[name = string("attn_output_13_transpose_y_0"), val = bool(false)]; tensor attn_output_13_cast_fp16 = matmul(transpose_x = attn_output_13_transpose_x_0, transpose_y = attn_output_13_transpose_y_0, x = softmax_3_cast_fp16, y = value_13_cast_fp16)[name = string("attn_output_13_cast_fp16")]; tensor var_996_perm_0 = const()[name = string("op_996_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_76_axis_0 = const()[name = string("concat_76_axis_0"), val = int32(0)]; bool concat_76_interleave_0 = const()[name = string("concat_76_interleave_0"), val = bool(false)]; int32 gather_59_cast_uint16_to_int32 = cast(dtype = gather_59_cast_uint16_to_int32_dtype_0, x = gather_59_cast_uint16)[name = string("cast_713")]; tensor concat_76 = concat(axis = concat_76_axis_0, interleave = concat_76_interleave_0, values = (gather_58, gather_59_cast_uint16_to_int32, var_72))[name = string("concat_76")]; tensor var_996_cast_fp16 = transpose(perm = var_996_perm_0, x = attn_output_13_cast_fp16)[name = string("transpose_96")]; tensor var_999_cast_fp16 = reshape(shape = concat_76, x = var_996_cast_fp16)[name = string("op_999_cast_fp16")]; tensor model_model_layers_3_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_3_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(413957056)))]; tensor linear_24_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_3_self_attn_o_proj_weight_to_fp16, x = var_999_cast_fp16)[name = string("linear_24_cast_fp16")]; tensor hidden_states_173_cast_fp16 = add(x = hidden_states_137_cast_fp16, y = linear_24_cast_fp16)[name = string("hidden_states_173_cast_fp16")]; fp16 var_78_promoted_15_to_fp16 = const()[name = string("op_78_promoted_15_to_fp16"), val = fp16(0x1p+1)]; tensor var_1006_cast_fp16 = pow(x = hidden_states_173_cast_fp16, y = var_78_promoted_15_to_fp16)[name = string("op_1006_cast_fp16")]; tensor variance_31_axes_0 = const()[name = string("variance_31_axes_0"), val = tensor([-1])]; bool variance_31_keep_dims_0 = const()[name = string("variance_31_keep_dims_0"), val = bool(true)]; tensor variance_31_cast_fp16 = reduce_mean(axes = variance_31_axes_0, keep_dims = variance_31_keep_dims_0, x = var_1006_cast_fp16)[name = string("variance_31_cast_fp16")]; fp16 var_1009_to_fp16 = const()[name = string("op_1009_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1010_cast_fp16 = add(x = variance_31_cast_fp16, y = var_1009_to_fp16)[name = string("op_1010_cast_fp16")]; fp32 var_1011_epsilon_0 = const()[name = string("op_1011_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1011_cast_fp16 = rsqrt(epsilon = var_1011_epsilon_0, x = var_1010_cast_fp16)[name = string("op_1011_cast_fp16")]; tensor hidden_states_177_cast_fp16 = mul(x = hidden_states_173_cast_fp16, y = var_1011_cast_fp16)[name = string("hidden_states_177_cast_fp16")]; tensor model_model_layers_3_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_3_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418151424)))]; tensor input_27_cast_fp16 = mul(x = model_model_layers_3_post_attention_layernorm_weight_to_fp16, y = hidden_states_177_cast_fp16)[name = string("input_27_cast_fp16")]; tensor model_model_layers_3_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_3_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(418153536)))]; tensor linear_25_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_3_mlp_gate_proj_weight_to_fp16, x = input_27_cast_fp16)[name = string("linear_25_cast_fp16")]; tensor var_1023_cast_fp16 = silu(x = linear_25_cast_fp16)[name = string("op_1023_cast_fp16")]; tensor model_model_layers_3_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_3_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(424445056)))]; tensor linear_26_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_3_mlp_up_proj_weight_to_fp16, x = input_27_cast_fp16)[name = string("linear_26_cast_fp16")]; tensor input_31_cast_fp16 = mul(x = var_1023_cast_fp16, y = linear_26_cast_fp16)[name = string("input_31_cast_fp16")]; tensor model_model_layers_3_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_3_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(430736576)))]; tensor linear_27_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_3_mlp_down_proj_weight_to_fp16, x = input_31_cast_fp16)[name = string("linear_27_cast_fp16")]; tensor hidden_states_183_cast_fp16 = add(x = hidden_states_173_cast_fp16, y = linear_27_cast_fp16)[name = string("hidden_states_183_cast_fp16")]; fp16 var_78_promoted_16_to_fp16 = const()[name = string("op_78_promoted_16_to_fp16"), val = fp16(0x1p+1)]; tensor var_1036_cast_fp16 = pow(x = hidden_states_183_cast_fp16, y = var_78_promoted_16_to_fp16)[name = string("op_1036_cast_fp16")]; tensor variance_33_axes_0 = const()[name = string("variance_33_axes_0"), val = tensor([-1])]; bool variance_33_keep_dims_0 = const()[name = string("variance_33_keep_dims_0"), val = bool(true)]; tensor variance_33_cast_fp16 = reduce_mean(axes = variance_33_axes_0, keep_dims = variance_33_keep_dims_0, x = var_1036_cast_fp16)[name = string("variance_33_cast_fp16")]; fp16 var_1039_to_fp16 = const()[name = string("op_1039_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1040_cast_fp16 = add(x = variance_33_cast_fp16, y = var_1039_to_fp16)[name = string("op_1040_cast_fp16")]; fp32 var_1041_epsilon_0 = const()[name = string("op_1041_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1041_cast_fp16 = rsqrt(epsilon = var_1041_epsilon_0, x = var_1040_cast_fp16)[name = string("op_1041_cast_fp16")]; tensor hidden_states_187_cast_fp16 = mul(x = hidden_states_183_cast_fp16, y = var_1041_cast_fp16)[name = string("hidden_states_187_cast_fp16")]; tensor model_model_layers_4_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437028096)))]; tensor hidden_states_191_cast_fp16 = mul(x = model_model_layers_4_input_layernorm_weight_to_fp16, y = hidden_states_187_cast_fp16)[name = string("hidden_states_191_cast_fp16")]; tensor var_1054_shape_cast_fp16 = shape(x = hidden_states_191_cast_fp16)[name = string("op_1054_shape_cast_fp16")]; int32 gather_76 = const()[name = string("gather_76"), val = int32(1)]; int32 gather_77_axis_0 = const()[name = string("gather_77_axis_0"), val = int32(0)]; int32 gather_77_batch_dims_0 = const()[name = string("gather_77_batch_dims_0"), val = int32(0)]; bool gather_77_validate_indices_0 = const()[name = string("gather_77_validate_indices_0"), val = bool(false)]; string var_1054_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1054_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_77_to_uint16 = const()[name = string("select_77_to_uint16"), val = uint16(1)]; tensor var_1054_shape_cast_fp16_to_uint16 = cast(dtype = var_1054_shape_cast_fp16_to_uint16_dtype_0, x = var_1054_shape_cast_fp16)[name = string("cast_706")]; uint16 gather_77_cast_uint16 = gather(axis = gather_77_axis_0, batch_dims = gather_77_batch_dims_0, indices = select_77_to_uint16, validate_indices = gather_77_validate_indices_0, x = var_1054_shape_cast_fp16_to_uint16)[name = string("gather_77_cast_uint16")]; string gather_77_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_77_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_4_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(437030208)))]; tensor linear_28_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_4_self_attn_q_proj_weight_to_fp16, x = hidden_states_191_cast_fp16)[name = string("linear_28_cast_fp16")]; tensor concat_77x = const()[name = string("concat_77x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_193_cast_fp16 = reshape(shape = concat_77x, x = linear_28_cast_fp16)[name = string("hidden_states_193_cast_fp16")]; fp16 var_78_promoted_17_to_fp16 = const()[name = string("op_78_promoted_17_to_fp16"), val = fp16(0x1p+1)]; tensor var_1062_cast_fp16 = pow(x = hidden_states_193_cast_fp16, y = var_78_promoted_17_to_fp16)[name = string("op_1062_cast_fp16")]; tensor variance_35_axes_0 = const()[name = string("variance_35_axes_0"), val = tensor([-1])]; bool variance_35_keep_dims_0 = const()[name = string("variance_35_keep_dims_0"), val = bool(true)]; tensor variance_35_cast_fp16 = reduce_mean(axes = variance_35_axes_0, keep_dims = variance_35_keep_dims_0, x = var_1062_cast_fp16)[name = string("variance_35_cast_fp16")]; fp16 var_1065_to_fp16 = const()[name = string("op_1065_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1066_cast_fp16 = add(x = variance_35_cast_fp16, y = var_1065_to_fp16)[name = string("op_1066_cast_fp16")]; fp32 var_1067_epsilon_0 = const()[name = string("op_1067_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1067_cast_fp16 = rsqrt(epsilon = var_1067_epsilon_0, x = var_1066_cast_fp16)[name = string("op_1067_cast_fp16")]; tensor hidden_states_197_cast_fp16 = mul(x = hidden_states_193_cast_fp16, y = var_1067_cast_fp16)[name = string("hidden_states_197_cast_fp16")]; tensor model_model_layers_4_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(441224576)))]; tensor var_1070_cast_fp16 = mul(x = model_model_layers_4_self_attn_q_norm_weight_to_fp16, y = hidden_states_197_cast_fp16)[name = string("op_1070_cast_fp16")]; tensor q_9_perm_0 = const()[name = string("q_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_4_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(441224896)))]; tensor linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_4_self_attn_k_proj_weight_to_fp16, x = hidden_states_191_cast_fp16)[name = string("linear_29_cast_fp16")]; tensor concat_78x = const()[name = string("concat_78x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_201_cast_fp16 = reshape(shape = concat_78x, x = linear_29_cast_fp16)[name = string("hidden_states_201_cast_fp16")]; fp16 var_78_promoted_18_to_fp16 = const()[name = string("op_78_promoted_18_to_fp16"), val = fp16(0x1p+1)]; tensor var_1078_cast_fp16 = pow(x = hidden_states_201_cast_fp16, y = var_78_promoted_18_to_fp16)[name = string("op_1078_cast_fp16")]; tensor variance_37_axes_0 = const()[name = string("variance_37_axes_0"), val = tensor([-1])]; bool variance_37_keep_dims_0 = const()[name = string("variance_37_keep_dims_0"), val = bool(true)]; tensor variance_37_cast_fp16 = reduce_mean(axes = variance_37_axes_0, keep_dims = variance_37_keep_dims_0, x = var_1078_cast_fp16)[name = string("variance_37_cast_fp16")]; fp16 var_1081_to_fp16 = const()[name = string("op_1081_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1082_cast_fp16 = add(x = variance_37_cast_fp16, y = var_1081_to_fp16)[name = string("op_1082_cast_fp16")]; fp32 var_1083_epsilon_0 = const()[name = string("op_1083_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1083_cast_fp16 = rsqrt(epsilon = var_1083_epsilon_0, x = var_1082_cast_fp16)[name = string("op_1083_cast_fp16")]; tensor hidden_states_205_cast_fp16 = mul(x = hidden_states_201_cast_fp16, y = var_1083_cast_fp16)[name = string("hidden_states_205_cast_fp16")]; tensor model_model_layers_4_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443322112)))]; tensor var_1086_cast_fp16 = mul(x = model_model_layers_4_self_attn_k_norm_weight_to_fp16, y = hidden_states_205_cast_fp16)[name = string("op_1086_cast_fp16")]; tensor k_9_perm_0 = const()[name = string("k_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_4_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(443322432)))]; tensor linear_30_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_4_self_attn_v_proj_weight_to_fp16, x = hidden_states_191_cast_fp16)[name = string("linear_30_cast_fp16")]; tensor concat_79x = const()[name = string("concat_79x"), val = tensor([1, -1, 8, 128])]; tensor var_1091_cast_fp16 = reshape(shape = concat_79x, x = linear_30_cast_fp16)[name = string("op_1091_cast_fp16")]; tensor v_state_9_perm_0 = const()[name = string("v_state_9_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_9_cast_fp16 = transpose(perm = q_9_perm_0, x = var_1070_cast_fp16)[name = string("transpose_95")]; tensor var_1095_cast_fp16 = mul(x = q_9_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1095_cast_fp16")]; tensor x1_17_begin_0 = const()[name = string("x1_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_17_end_0 = const()[name = string("x1_17_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_17_end_mask_0 = const()[name = string("x1_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_17_cast_fp16 = slice_by_index(begin = x1_17_begin_0, end = x1_17_end_0, end_mask = x1_17_end_mask_0, x = q_9_cast_fp16)[name = string("x1_17_cast_fp16")]; tensor x2_17_begin_0 = const()[name = string("x2_17_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_17_end_0 = const()[name = string("x2_17_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_17_end_mask_0 = const()[name = string("x2_17_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_17_cast_fp16 = slice_by_index(begin = x2_17_begin_0, end = x2_17_end_0, end_mask = x2_17_end_mask_0, x = q_9_cast_fp16)[name = string("x2_17_cast_fp16")]; fp16 const_12_promoted_to_fp16 = const()[name = string("const_12_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1106_cast_fp16 = mul(x = x2_17_cast_fp16, y = const_12_promoted_to_fp16)[name = string("op_1106_cast_fp16")]; bool var_1108_interleave_0 = const()[name = string("op_1108_interleave_0"), val = bool(false)]; tensor var_1108_cast_fp16 = concat(axis = var_72, interleave = var_1108_interleave_0, values = (var_1106_cast_fp16, x1_17_cast_fp16))[name = string("op_1108_cast_fp16")]; tensor var_1109_cast_fp16 = mul(x = var_1108_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1109_cast_fp16")]; tensor query_17_cast_fp16 = add(x = var_1095_cast_fp16, y = var_1109_cast_fp16)[name = string("query_17_cast_fp16")]; tensor k_9_cast_fp16 = transpose(perm = k_9_perm_0, x = var_1086_cast_fp16)[name = string("transpose_94")]; tensor var_1111_cast_fp16 = mul(x = k_9_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1111_cast_fp16")]; tensor x1_19_begin_0 = const()[name = string("x1_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_19_end_0 = const()[name = string("x1_19_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_19_end_mask_0 = const()[name = string("x1_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_19_cast_fp16 = slice_by_index(begin = x1_19_begin_0, end = x1_19_end_0, end_mask = x1_19_end_mask_0, x = k_9_cast_fp16)[name = string("x1_19_cast_fp16")]; tensor x2_19_begin_0 = const()[name = string("x2_19_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_19_end_0 = const()[name = string("x2_19_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_19_end_mask_0 = const()[name = string("x2_19_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_19_cast_fp16 = slice_by_index(begin = x2_19_begin_0, end = x2_19_end_0, end_mask = x2_19_end_mask_0, x = k_9_cast_fp16)[name = string("x2_19_cast_fp16")]; fp16 const_13_promoted_to_fp16 = const()[name = string("const_13_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1122_cast_fp16 = mul(x = x2_19_cast_fp16, y = const_13_promoted_to_fp16)[name = string("op_1122_cast_fp16")]; bool var_1124_interleave_0 = const()[name = string("op_1124_interleave_0"), val = bool(false)]; tensor var_1124_cast_fp16 = concat(axis = var_72, interleave = var_1124_interleave_0, values = (var_1122_cast_fp16, x1_19_cast_fp16))[name = string("op_1124_cast_fp16")]; tensor var_1125_cast_fp16 = mul(x = var_1124_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1125_cast_fp16")]; tensor k_state_9_cast_fp16 = add(x = var_1111_cast_fp16, y = var_1125_cast_fp16)[name = string("k_state_9_cast_fp16")]; tensor expand_dims_48 = const()[name = string("expand_dims_48"), val = tensor([0])]; tensor expand_dims_49 = const()[name = string("expand_dims_49"), val = tensor([0])]; tensor expand_dims_51 = const()[name = string("expand_dims_51"), val = tensor([0])]; tensor concat_82_values0_0 = const()[name = string("concat_82_values0_0"), val = tensor([4])]; int32 concat_82_axis_0 = const()[name = string("concat_82_axis_0"), val = int32(0)]; bool concat_82_interleave_0 = const()[name = string("concat_82_interleave_0"), val = bool(false)]; tensor concat_82 = concat(axis = concat_82_axis_0, interleave = concat_82_interleave_0, values = (concat_82_values0_0, expand_dims_48, expand_dims_49, expand_dims_2, expand_dims_51))[name = string("concat_82")]; tensor key_cache_internal_tensor_assign_5_stride_0 = const()[name = string("key_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_82, begin_mask = key_cache_internal_tensor_assign_5_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_5_squeeze_mask_0, stride = key_cache_internal_tensor_assign_5_stride_0, update = k_state_9_cast_fp16, x = coreml_update_state_62)[name = string("key_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_5_cast_fp16, input = key_cache)[name = string("coreml_update_state_64_write_state")]; tensor coreml_update_state_64 = read_state(input = key_cache)[name = string("coreml_update_state_64")]; tensor value_cache_internal_tensor_assign_5_stride_0 = const()[name = string("value_cache_internal_tensor_assign_5_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_5_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_5_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_5_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_5_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_9_cast_fp16 = transpose(perm = v_state_9_perm_0, x = var_1091_cast_fp16)[name = string("transpose_93")]; tensor value_cache_internal_tensor_assign_5_cast_fp16 = slice_update(begin = concat_82, begin_mask = value_cache_internal_tensor_assign_5_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_5_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_5_squeeze_mask_0, stride = value_cache_internal_tensor_assign_5_stride_0, update = v_state_9_cast_fp16, x = coreml_update_state_63)[name = string("value_cache_internal_tensor_assign_5_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_5_cast_fp16, input = value_cache)[name = string("coreml_update_state_65_write_state")]; tensor coreml_update_state_65 = read_state(input = value_cache)[name = string("coreml_update_state_65")]; tensor var_1148_begin_0 = const()[name = string("op_1148_begin_0"), val = tensor([4, 0, 0, 0, 0])]; tensor var_1148_end_0 = const()[name = string("op_1148_end_0"), val = tensor([5, 1, 8, 2048, 128])]; tensor var_1148_end_mask_0 = const()[name = string("op_1148_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1148_squeeze_mask_0 = const()[name = string("op_1148_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, squeeze_mask = var_1148_squeeze_mask_0, x = coreml_update_state_64)[name = string("op_1148_cast_fp16")]; tensor var_1151_begin_0 = const()[name = string("op_1151_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1151_end_mask_0 = const()[name = string("op_1151_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1151_cast_fp16 = slice_by_index(begin = var_1151_begin_0, end = concat_12, end_mask = var_1151_end_mask_0, x = var_1148_cast_fp16)[name = string("op_1151_cast_fp16")]; tensor var_1153_begin_0 = const()[name = string("op_1153_begin_0"), val = tensor([4, 0, 0, 0, 0])]; tensor var_1153_end_0 = const()[name = string("op_1153_end_0"), val = tensor([5, 1, 8, 2048, 128])]; tensor var_1153_end_mask_0 = const()[name = string("op_1153_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1153_squeeze_mask_0 = const()[name = string("op_1153_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1153_cast_fp16 = slice_by_index(begin = var_1153_begin_0, end = var_1153_end_0, end_mask = var_1153_end_mask_0, squeeze_mask = var_1153_squeeze_mask_0, x = coreml_update_state_65)[name = string("op_1153_cast_fp16")]; tensor var_1156_begin_0 = const()[name = string("op_1156_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1156_end_mask_0 = const()[name = string("op_1156_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1156_cast_fp16 = slice_by_index(begin = var_1156_begin_0, end = concat_12, end_mask = var_1156_end_mask_0, x = var_1153_cast_fp16)[name = string("op_1156_cast_fp16")]; tensor var_1158_shape_cast_fp16 = shape(x = var_1151_cast_fp16)[name = string("op_1158_shape_cast_fp16")]; int32 gather_85 = const()[name = string("gather_85"), val = int32(1)]; int32 gather_86 = const()[name = string("gather_86"), val = int32(8)]; int32 gather_87_axis_0 = const()[name = string("gather_87_axis_0"), val = int32(0)]; int32 gather_87_batch_dims_0 = const()[name = string("gather_87_batch_dims_0"), val = int32(0)]; bool gather_87_validate_indices_0 = const()[name = string("gather_87_validate_indices_0"), val = bool(false)]; string var_1158_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1158_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_87_to_uint16 = const()[name = string("select_87_to_uint16"), val = uint16(2)]; tensor var_1158_shape_cast_fp16_to_uint16 = cast(dtype = var_1158_shape_cast_fp16_to_uint16_dtype_0, x = var_1158_shape_cast_fp16)[name = string("cast_704")]; uint16 gather_87_cast_uint16 = gather(axis = gather_87_axis_0, batch_dims = gather_87_batch_dims_0, indices = select_87_to_uint16, validate_indices = gather_87_validate_indices_0, x = var_1158_shape_cast_fp16_to_uint16)[name = string("gather_87_cast_uint16")]; string gather_87_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_87_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_88 = const()[name = string("gather_88"), val = int32(128)]; tensor var_1165_axes_0 = const()[name = string("op_1165_axes_0"), val = tensor([2])]; tensor var_1165_cast_fp16 = expand_dims(axes = var_1165_axes_0, x = var_1151_cast_fp16)[name = string("op_1165_cast_fp16")]; int32 concat_90_axis_0 = const()[name = string("concat_90_axis_0"), val = int32(0)]; bool concat_90_interleave_0 = const()[name = string("concat_90_interleave_0"), val = bool(false)]; int32 gather_87_cast_uint16_to_int32 = cast(dtype = gather_87_cast_uint16_to_int32_dtype_0, x = gather_87_cast_uint16)[name = string("cast_703")]; tensor concat_90 = concat(axis = concat_90_axis_0, interleave = concat_90_interleave_0, values = (gather_85, gather_86, var_78, gather_87_cast_uint16_to_int32, gather_88))[name = string("concat_90")]; tensor shape_97_cast_fp16 = shape(x = var_1165_cast_fp16)[name = string("shape_97_cast_fp16")]; tensor real_div_8 = real_div(x = concat_90, y = shape_97_cast_fp16)[name = string("real_div_8")]; tensor hidden_states_211_cast_fp16 = tile(reps = real_div_8, x = var_1165_cast_fp16)[name = string("hidden_states_211_cast_fp16")]; tensor concat_91x = const()[name = string("concat_91x"), val = tensor([1, 16, -1, 128])]; tensor key_17_cast_fp16 = reshape(shape = concat_91x, x = hidden_states_211_cast_fp16)[name = string("key_17_cast_fp16")]; tensor var_1175_shape_cast_fp16 = shape(x = var_1156_cast_fp16)[name = string("op_1175_shape_cast_fp16")]; int32 gather_89 = const()[name = string("gather_89"), val = int32(1)]; int32 gather_90 = const()[name = string("gather_90"), val = int32(8)]; int32 gather_91_axis_0 = const()[name = string("gather_91_axis_0"), val = int32(0)]; int32 gather_91_batch_dims_0 = const()[name = string("gather_91_batch_dims_0"), val = int32(0)]; bool gather_91_validate_indices_0 = const()[name = string("gather_91_validate_indices_0"), val = bool(false)]; string var_1175_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1175_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_91_to_uint16 = const()[name = string("select_91_to_uint16"), val = uint16(2)]; tensor var_1175_shape_cast_fp16_to_uint16 = cast(dtype = var_1175_shape_cast_fp16_to_uint16_dtype_0, x = var_1175_shape_cast_fp16)[name = string("cast_702")]; uint16 gather_91_cast_uint16 = gather(axis = gather_91_axis_0, batch_dims = gather_91_batch_dims_0, indices = select_91_to_uint16, validate_indices = gather_91_validate_indices_0, x = var_1175_shape_cast_fp16_to_uint16)[name = string("gather_91_cast_uint16")]; string gather_91_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_91_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_92 = const()[name = string("gather_92"), val = int32(128)]; tensor var_1182_axes_0 = const()[name = string("op_1182_axes_0"), val = tensor([2])]; tensor var_1182_cast_fp16 = expand_dims(axes = var_1182_axes_0, x = var_1156_cast_fp16)[name = string("op_1182_cast_fp16")]; int32 concat_92_axis_0 = const()[name = string("concat_92_axis_0"), val = int32(0)]; bool concat_92_interleave_0 = const()[name = string("concat_92_interleave_0"), val = bool(false)]; int32 gather_91_cast_uint16_to_int32 = cast(dtype = gather_91_cast_uint16_to_int32_dtype_0, x = gather_91_cast_uint16)[name = string("cast_701")]; tensor concat_92 = concat(axis = concat_92_axis_0, interleave = concat_92_interleave_0, values = (gather_89, gather_90, var_78, gather_91_cast_uint16_to_int32, gather_92))[name = string("concat_92")]; tensor shape_102_cast_fp16 = shape(x = var_1182_cast_fp16)[name = string("shape_102_cast_fp16")]; tensor real_div_9 = real_div(x = concat_92, y = shape_102_cast_fp16)[name = string("real_div_9")]; tensor hidden_states_215_cast_fp16 = tile(reps = real_div_9, x = var_1182_cast_fp16)[name = string("hidden_states_215_cast_fp16")]; tensor concat_93x = const()[name = string("concat_93x"), val = tensor([1, 16, -1, 128])]; tensor value_17_cast_fp16 = reshape(shape = concat_93x, x = hidden_states_215_cast_fp16)[name = string("value_17_cast_fp16")]; tensor var_1192_shape_cast_fp16 = shape(x = key_17_cast_fp16)[name = string("op_1192_shape_cast_fp16")]; int32 gather_93_axis_0 = const()[name = string("gather_93_axis_0"), val = int32(0)]; int32 gather_93_batch_dims_0 = const()[name = string("gather_93_batch_dims_0"), val = int32(0)]; bool gather_93_validate_indices_0 = const()[name = string("gather_93_validate_indices_0"), val = bool(false)]; string var_1192_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1192_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_93_to_uint16 = const()[name = string("select_93_to_uint16"), val = uint16(2)]; tensor var_1192_shape_cast_fp16_to_uint16 = cast(dtype = var_1192_shape_cast_fp16_to_uint16_dtype_0, x = var_1192_shape_cast_fp16)[name = string("cast_700")]; uint16 gather_93_cast_uint16 = gather(axis = gather_93_axis_0, batch_dims = gather_93_batch_dims_0, indices = select_93_to_uint16, validate_indices = gather_93_validate_indices_0, x = var_1192_shape_cast_fp16_to_uint16)[name = string("gather_93_cast_uint16")]; string gather_93_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_93_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_94_values0_0 = const()[name = string("concat_94_values0_0"), val = int32(1)]; int32 concat_94_values1_0 = const()[name = string("concat_94_values1_0"), val = int32(1)]; int32 concat_94_values2_0 = const()[name = string("concat_94_values2_0"), val = int32(0)]; int32 concat_94_axis_0 = const()[name = string("concat_94_axis_0"), val = int32(0)]; bool concat_94_interleave_0 = const()[name = string("concat_94_interleave_0"), val = bool(false)]; int32 gather_93_cast_uint16_to_int32 = cast(dtype = gather_93_cast_uint16_to_int32_dtype_0, x = gather_93_cast_uint16)[name = string("cast_699")]; tensor concat_94 = concat(axis = concat_94_axis_0, interleave = concat_94_interleave_0, values = (concat_94_values0_0, concat_94_values1_0, concat_94_values2_0, gather_93_cast_uint16_to_int32))[name = string("concat_94")]; tensor attention_mask_9_begin_0 = const()[name = string("attention_mask_9_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_9_end_mask_0 = const()[name = string("attention_mask_9_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_9_cast_fp16 = slice_by_index(begin = attention_mask_9_begin_0, end = concat_94, end_mask = attention_mask_9_end_mask_0, x = causal_mask)[name = string("attention_mask_9_cast_fp16")]; tensor mul_4_cast_fp16 = mul(x = query_17_cast_fp16, y = var_85_to_fp16)[name = string("mul_4_cast_fp16")]; bool matmul_4_transpose_y_0 = const()[name = string("matmul_4_transpose_y_0"), val = bool(true)]; bool matmul_4_transpose_x_0 = const()[name = string("matmul_4_transpose_x_0"), val = bool(false)]; tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = mul_4_cast_fp16, y = key_17_cast_fp16)[name = string("matmul_4_cast_fp16")]; tensor add_98_cast_fp16 = add(x = matmul_4_cast_fp16, y = attention_mask_9_cast_fp16)[name = string("add_98_cast_fp16")]; int32 softmax_4_axis_0 = const()[name = string("softmax_4_axis_0"), val = int32(-1)]; tensor softmax_4_cast_fp16 = softmax(axis = softmax_4_axis_0, x = add_98_cast_fp16)[name = string("softmax_4_cast_fp16")]; bool attn_output_17_transpose_x_0 = const()[name = string("attn_output_17_transpose_x_0"), val = bool(false)]; bool attn_output_17_transpose_y_0 = const()[name = string("attn_output_17_transpose_y_0"), val = bool(false)]; tensor attn_output_17_cast_fp16 = matmul(transpose_x = attn_output_17_transpose_x_0, transpose_y = attn_output_17_transpose_y_0, x = softmax_4_cast_fp16, y = value_17_cast_fp16)[name = string("attn_output_17_cast_fp16")]; tensor var_1201_perm_0 = const()[name = string("op_1201_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_95_axis_0 = const()[name = string("concat_95_axis_0"), val = int32(0)]; bool concat_95_interleave_0 = const()[name = string("concat_95_interleave_0"), val = bool(false)]; int32 gather_77_cast_uint16_to_int32 = cast(dtype = gather_77_cast_uint16_to_int32_dtype_0, x = gather_77_cast_uint16)[name = string("cast_705")]; tensor concat_95 = concat(axis = concat_95_axis_0, interleave = concat_95_interleave_0, values = (gather_76, gather_77_cast_uint16_to_int32, var_72))[name = string("concat_95")]; tensor var_1201_cast_fp16 = transpose(perm = var_1201_perm_0, x = attn_output_17_cast_fp16)[name = string("transpose_92")]; tensor var_1204_cast_fp16 = reshape(shape = concat_95, x = var_1201_cast_fp16)[name = string("op_1204_cast_fp16")]; tensor model_model_layers_4_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_4_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(445419648)))]; tensor linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_4_self_attn_o_proj_weight_to_fp16, x = var_1204_cast_fp16)[name = string("linear_31_cast_fp16")]; tensor hidden_states_219_cast_fp16 = add(x = hidden_states_183_cast_fp16, y = linear_31_cast_fp16)[name = string("hidden_states_219_cast_fp16")]; fp16 var_78_promoted_19_to_fp16 = const()[name = string("op_78_promoted_19_to_fp16"), val = fp16(0x1p+1)]; tensor var_1211_cast_fp16 = pow(x = hidden_states_219_cast_fp16, y = var_78_promoted_19_to_fp16)[name = string("op_1211_cast_fp16")]; tensor variance_39_axes_0 = const()[name = string("variance_39_axes_0"), val = tensor([-1])]; bool variance_39_keep_dims_0 = const()[name = string("variance_39_keep_dims_0"), val = bool(true)]; tensor variance_39_cast_fp16 = reduce_mean(axes = variance_39_axes_0, keep_dims = variance_39_keep_dims_0, x = var_1211_cast_fp16)[name = string("variance_39_cast_fp16")]; fp16 var_1214_to_fp16 = const()[name = string("op_1214_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1215_cast_fp16 = add(x = variance_39_cast_fp16, y = var_1214_to_fp16)[name = string("op_1215_cast_fp16")]; fp32 var_1216_epsilon_0 = const()[name = string("op_1216_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1216_cast_fp16 = rsqrt(epsilon = var_1216_epsilon_0, x = var_1215_cast_fp16)[name = string("op_1216_cast_fp16")]; tensor hidden_states_223_cast_fp16 = mul(x = hidden_states_219_cast_fp16, y = var_1216_cast_fp16)[name = string("hidden_states_223_cast_fp16")]; tensor model_model_layers_4_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_4_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449614016)))]; tensor input_35_cast_fp16 = mul(x = model_model_layers_4_post_attention_layernorm_weight_to_fp16, y = hidden_states_223_cast_fp16)[name = string("input_35_cast_fp16")]; tensor model_model_layers_4_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_4_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(449616128)))]; tensor linear_32_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_4_mlp_gate_proj_weight_to_fp16, x = input_35_cast_fp16)[name = string("linear_32_cast_fp16")]; tensor var_1228_cast_fp16 = silu(x = linear_32_cast_fp16)[name = string("op_1228_cast_fp16")]; tensor model_model_layers_4_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_4_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(455907648)))]; tensor linear_33_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_4_mlp_up_proj_weight_to_fp16, x = input_35_cast_fp16)[name = string("linear_33_cast_fp16")]; tensor input_39_cast_fp16 = mul(x = var_1228_cast_fp16, y = linear_33_cast_fp16)[name = string("input_39_cast_fp16")]; tensor model_model_layers_4_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_4_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(462199168)))]; tensor linear_34_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_4_mlp_down_proj_weight_to_fp16, x = input_39_cast_fp16)[name = string("linear_34_cast_fp16")]; tensor hidden_states_229_cast_fp16 = add(x = hidden_states_219_cast_fp16, y = linear_34_cast_fp16)[name = string("hidden_states_229_cast_fp16")]; fp16 var_78_promoted_20_to_fp16 = const()[name = string("op_78_promoted_20_to_fp16"), val = fp16(0x1p+1)]; tensor var_1241_cast_fp16 = pow(x = hidden_states_229_cast_fp16, y = var_78_promoted_20_to_fp16)[name = string("op_1241_cast_fp16")]; tensor variance_41_axes_0 = const()[name = string("variance_41_axes_0"), val = tensor([-1])]; bool variance_41_keep_dims_0 = const()[name = string("variance_41_keep_dims_0"), val = bool(true)]; tensor variance_41_cast_fp16 = reduce_mean(axes = variance_41_axes_0, keep_dims = variance_41_keep_dims_0, x = var_1241_cast_fp16)[name = string("variance_41_cast_fp16")]; fp16 var_1244_to_fp16 = const()[name = string("op_1244_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1245_cast_fp16 = add(x = variance_41_cast_fp16, y = var_1244_to_fp16)[name = string("op_1245_cast_fp16")]; fp32 var_1246_epsilon_0 = const()[name = string("op_1246_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1246_cast_fp16 = rsqrt(epsilon = var_1246_epsilon_0, x = var_1245_cast_fp16)[name = string("op_1246_cast_fp16")]; tensor hidden_states_233_cast_fp16 = mul(x = hidden_states_229_cast_fp16, y = var_1246_cast_fp16)[name = string("hidden_states_233_cast_fp16")]; tensor model_model_layers_5_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(468490688)))]; tensor hidden_states_237_cast_fp16 = mul(x = model_model_layers_5_input_layernorm_weight_to_fp16, y = hidden_states_233_cast_fp16)[name = string("hidden_states_237_cast_fp16")]; tensor var_1259_shape_cast_fp16 = shape(x = hidden_states_237_cast_fp16)[name = string("op_1259_shape_cast_fp16")]; int32 gather_94 = const()[name = string("gather_94"), val = int32(1)]; int32 gather_95_axis_0 = const()[name = string("gather_95_axis_0"), val = int32(0)]; int32 gather_95_batch_dims_0 = const()[name = string("gather_95_batch_dims_0"), val = int32(0)]; bool gather_95_validate_indices_0 = const()[name = string("gather_95_validate_indices_0"), val = bool(false)]; string var_1259_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1259_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_95_to_uint16 = const()[name = string("select_95_to_uint16"), val = uint16(1)]; tensor var_1259_shape_cast_fp16_to_uint16 = cast(dtype = var_1259_shape_cast_fp16_to_uint16_dtype_0, x = var_1259_shape_cast_fp16)[name = string("cast_698")]; uint16 gather_95_cast_uint16 = gather(axis = gather_95_axis_0, batch_dims = gather_95_batch_dims_0, indices = select_95_to_uint16, validate_indices = gather_95_validate_indices_0, x = var_1259_shape_cast_fp16_to_uint16)[name = string("gather_95_cast_uint16")]; string gather_95_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_95_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_5_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(468492800)))]; tensor linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_5_self_attn_q_proj_weight_to_fp16, x = hidden_states_237_cast_fp16)[name = string("linear_35_cast_fp16")]; tensor concat_96x = const()[name = string("concat_96x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_239_cast_fp16 = reshape(shape = concat_96x, x = linear_35_cast_fp16)[name = string("hidden_states_239_cast_fp16")]; fp16 var_78_promoted_21_to_fp16 = const()[name = string("op_78_promoted_21_to_fp16"), val = fp16(0x1p+1)]; tensor var_1267_cast_fp16 = pow(x = hidden_states_239_cast_fp16, y = var_78_promoted_21_to_fp16)[name = string("op_1267_cast_fp16")]; tensor variance_43_axes_0 = const()[name = string("variance_43_axes_0"), val = tensor([-1])]; bool variance_43_keep_dims_0 = const()[name = string("variance_43_keep_dims_0"), val = bool(true)]; tensor variance_43_cast_fp16 = reduce_mean(axes = variance_43_axes_0, keep_dims = variance_43_keep_dims_0, x = var_1267_cast_fp16)[name = string("variance_43_cast_fp16")]; fp16 var_1270_to_fp16 = const()[name = string("op_1270_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1271_cast_fp16 = add(x = variance_43_cast_fp16, y = var_1270_to_fp16)[name = string("op_1271_cast_fp16")]; fp32 var_1272_epsilon_0 = const()[name = string("op_1272_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1272_cast_fp16 = rsqrt(epsilon = var_1272_epsilon_0, x = var_1271_cast_fp16)[name = string("op_1272_cast_fp16")]; tensor hidden_states_243_cast_fp16 = mul(x = hidden_states_239_cast_fp16, y = var_1272_cast_fp16)[name = string("hidden_states_243_cast_fp16")]; tensor model_model_layers_5_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472687168)))]; tensor var_1275_cast_fp16 = mul(x = model_model_layers_5_self_attn_q_norm_weight_to_fp16, y = hidden_states_243_cast_fp16)[name = string("op_1275_cast_fp16")]; tensor q_11_perm_0 = const()[name = string("q_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_5_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(472687488)))]; tensor linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_5_self_attn_k_proj_weight_to_fp16, x = hidden_states_237_cast_fp16)[name = string("linear_36_cast_fp16")]; tensor concat_97x = const()[name = string("concat_97x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_247_cast_fp16 = reshape(shape = concat_97x, x = linear_36_cast_fp16)[name = string("hidden_states_247_cast_fp16")]; fp16 var_78_promoted_22_to_fp16 = const()[name = string("op_78_promoted_22_to_fp16"), val = fp16(0x1p+1)]; tensor var_1283_cast_fp16 = pow(x = hidden_states_247_cast_fp16, y = var_78_promoted_22_to_fp16)[name = string("op_1283_cast_fp16")]; tensor variance_45_axes_0 = const()[name = string("variance_45_axes_0"), val = tensor([-1])]; bool variance_45_keep_dims_0 = const()[name = string("variance_45_keep_dims_0"), val = bool(true)]; tensor variance_45_cast_fp16 = reduce_mean(axes = variance_45_axes_0, keep_dims = variance_45_keep_dims_0, x = var_1283_cast_fp16)[name = string("variance_45_cast_fp16")]; fp16 var_1286_to_fp16 = const()[name = string("op_1286_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1287_cast_fp16 = add(x = variance_45_cast_fp16, y = var_1286_to_fp16)[name = string("op_1287_cast_fp16")]; fp32 var_1288_epsilon_0 = const()[name = string("op_1288_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1288_cast_fp16 = rsqrt(epsilon = var_1288_epsilon_0, x = var_1287_cast_fp16)[name = string("op_1288_cast_fp16")]; tensor hidden_states_251_cast_fp16 = mul(x = hidden_states_247_cast_fp16, y = var_1288_cast_fp16)[name = string("hidden_states_251_cast_fp16")]; tensor model_model_layers_5_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(474784704)))]; tensor var_1291_cast_fp16 = mul(x = model_model_layers_5_self_attn_k_norm_weight_to_fp16, y = hidden_states_251_cast_fp16)[name = string("op_1291_cast_fp16")]; tensor k_11_perm_0 = const()[name = string("k_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_5_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(474785024)))]; tensor linear_37_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_5_self_attn_v_proj_weight_to_fp16, x = hidden_states_237_cast_fp16)[name = string("linear_37_cast_fp16")]; tensor concat_98x = const()[name = string("concat_98x"), val = tensor([1, -1, 8, 128])]; tensor var_1296_cast_fp16 = reshape(shape = concat_98x, x = linear_37_cast_fp16)[name = string("op_1296_cast_fp16")]; tensor v_state_11_perm_0 = const()[name = string("v_state_11_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_11_cast_fp16 = transpose(perm = q_11_perm_0, x = var_1275_cast_fp16)[name = string("transpose_91")]; tensor var_1300_cast_fp16 = mul(x = q_11_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1300_cast_fp16")]; tensor x1_21_begin_0 = const()[name = string("x1_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_21_end_0 = const()[name = string("x1_21_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_21_end_mask_0 = const()[name = string("x1_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_21_cast_fp16 = slice_by_index(begin = x1_21_begin_0, end = x1_21_end_0, end_mask = x1_21_end_mask_0, x = q_11_cast_fp16)[name = string("x1_21_cast_fp16")]; tensor x2_21_begin_0 = const()[name = string("x2_21_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_21_end_0 = const()[name = string("x2_21_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_21_end_mask_0 = const()[name = string("x2_21_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_21_cast_fp16 = slice_by_index(begin = x2_21_begin_0, end = x2_21_end_0, end_mask = x2_21_end_mask_0, x = q_11_cast_fp16)[name = string("x2_21_cast_fp16")]; fp16 const_14_promoted_to_fp16 = const()[name = string("const_14_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1311_cast_fp16 = mul(x = x2_21_cast_fp16, y = const_14_promoted_to_fp16)[name = string("op_1311_cast_fp16")]; bool var_1313_interleave_0 = const()[name = string("op_1313_interleave_0"), val = bool(false)]; tensor var_1313_cast_fp16 = concat(axis = var_72, interleave = var_1313_interleave_0, values = (var_1311_cast_fp16, x1_21_cast_fp16))[name = string("op_1313_cast_fp16")]; tensor var_1314_cast_fp16 = mul(x = var_1313_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1314_cast_fp16")]; tensor query_21_cast_fp16 = add(x = var_1300_cast_fp16, y = var_1314_cast_fp16)[name = string("query_21_cast_fp16")]; tensor k_11_cast_fp16 = transpose(perm = k_11_perm_0, x = var_1291_cast_fp16)[name = string("transpose_90")]; tensor var_1316_cast_fp16 = mul(x = k_11_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1316_cast_fp16")]; tensor x1_23_begin_0 = const()[name = string("x1_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_23_end_0 = const()[name = string("x1_23_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_23_end_mask_0 = const()[name = string("x1_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_23_cast_fp16 = slice_by_index(begin = x1_23_begin_0, end = x1_23_end_0, end_mask = x1_23_end_mask_0, x = k_11_cast_fp16)[name = string("x1_23_cast_fp16")]; tensor x2_23_begin_0 = const()[name = string("x2_23_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_23_end_0 = const()[name = string("x2_23_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_23_end_mask_0 = const()[name = string("x2_23_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_23_cast_fp16 = slice_by_index(begin = x2_23_begin_0, end = x2_23_end_0, end_mask = x2_23_end_mask_0, x = k_11_cast_fp16)[name = string("x2_23_cast_fp16")]; fp16 const_15_promoted_to_fp16 = const()[name = string("const_15_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1327_cast_fp16 = mul(x = x2_23_cast_fp16, y = const_15_promoted_to_fp16)[name = string("op_1327_cast_fp16")]; bool var_1329_interleave_0 = const()[name = string("op_1329_interleave_0"), val = bool(false)]; tensor var_1329_cast_fp16 = concat(axis = var_72, interleave = var_1329_interleave_0, values = (var_1327_cast_fp16, x1_23_cast_fp16))[name = string("op_1329_cast_fp16")]; tensor var_1330_cast_fp16 = mul(x = var_1329_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1330_cast_fp16")]; tensor k_state_11_cast_fp16 = add(x = var_1316_cast_fp16, y = var_1330_cast_fp16)[name = string("k_state_11_cast_fp16")]; tensor expand_dims_60 = const()[name = string("expand_dims_60"), val = tensor([0])]; tensor expand_dims_61 = const()[name = string("expand_dims_61"), val = tensor([0])]; tensor expand_dims_63 = const()[name = string("expand_dims_63"), val = tensor([0])]; tensor concat_101_values0_0 = const()[name = string("concat_101_values0_0"), val = tensor([5])]; int32 concat_101_axis_0 = const()[name = string("concat_101_axis_0"), val = int32(0)]; bool concat_101_interleave_0 = const()[name = string("concat_101_interleave_0"), val = bool(false)]; tensor concat_101 = concat(axis = concat_101_axis_0, interleave = concat_101_interleave_0, values = (concat_101_values0_0, expand_dims_60, expand_dims_61, expand_dims_2, expand_dims_63))[name = string("concat_101")]; tensor key_cache_internal_tensor_assign_6_stride_0 = const()[name = string("key_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_101, begin_mask = key_cache_internal_tensor_assign_6_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_6_squeeze_mask_0, stride = key_cache_internal_tensor_assign_6_stride_0, update = k_state_11_cast_fp16, x = coreml_update_state_64)[name = string("key_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_6_cast_fp16, input = key_cache)[name = string("coreml_update_state_66_write_state")]; tensor coreml_update_state_66 = read_state(input = key_cache)[name = string("coreml_update_state_66")]; tensor value_cache_internal_tensor_assign_6_stride_0 = const()[name = string("value_cache_internal_tensor_assign_6_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_6_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_6_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_6_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_6_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_11_cast_fp16 = transpose(perm = v_state_11_perm_0, x = var_1296_cast_fp16)[name = string("transpose_89")]; tensor value_cache_internal_tensor_assign_6_cast_fp16 = slice_update(begin = concat_101, begin_mask = value_cache_internal_tensor_assign_6_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_6_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_6_squeeze_mask_0, stride = value_cache_internal_tensor_assign_6_stride_0, update = v_state_11_cast_fp16, x = coreml_update_state_65)[name = string("value_cache_internal_tensor_assign_6_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_6_cast_fp16, input = value_cache)[name = string("coreml_update_state_67_write_state")]; tensor coreml_update_state_67 = read_state(input = value_cache)[name = string("coreml_update_state_67")]; tensor var_1353_begin_0 = const()[name = string("op_1353_begin_0"), val = tensor([5, 0, 0, 0, 0])]; tensor var_1353_end_0 = const()[name = string("op_1353_end_0"), val = tensor([6, 1, 8, 2048, 128])]; tensor var_1353_end_mask_0 = const()[name = string("op_1353_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1353_squeeze_mask_0 = const()[name = string("op_1353_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1353_cast_fp16 = slice_by_index(begin = var_1353_begin_0, end = var_1353_end_0, end_mask = var_1353_end_mask_0, squeeze_mask = var_1353_squeeze_mask_0, x = coreml_update_state_66)[name = string("op_1353_cast_fp16")]; tensor var_1356_begin_0 = const()[name = string("op_1356_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1356_end_mask_0 = const()[name = string("op_1356_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1356_cast_fp16 = slice_by_index(begin = var_1356_begin_0, end = concat_12, end_mask = var_1356_end_mask_0, x = var_1353_cast_fp16)[name = string("op_1356_cast_fp16")]; tensor var_1358_begin_0 = const()[name = string("op_1358_begin_0"), val = tensor([5, 0, 0, 0, 0])]; tensor var_1358_end_0 = const()[name = string("op_1358_end_0"), val = tensor([6, 1, 8, 2048, 128])]; tensor var_1358_end_mask_0 = const()[name = string("op_1358_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1358_squeeze_mask_0 = const()[name = string("op_1358_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1358_cast_fp16 = slice_by_index(begin = var_1358_begin_0, end = var_1358_end_0, end_mask = var_1358_end_mask_0, squeeze_mask = var_1358_squeeze_mask_0, x = coreml_update_state_67)[name = string("op_1358_cast_fp16")]; tensor var_1361_begin_0 = const()[name = string("op_1361_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1361_end_mask_0 = const()[name = string("op_1361_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1361_cast_fp16 = slice_by_index(begin = var_1361_begin_0, end = concat_12, end_mask = var_1361_end_mask_0, x = var_1358_cast_fp16)[name = string("op_1361_cast_fp16")]; tensor var_1363_shape_cast_fp16 = shape(x = var_1356_cast_fp16)[name = string("op_1363_shape_cast_fp16")]; int32 gather_103 = const()[name = string("gather_103"), val = int32(1)]; int32 gather_104 = const()[name = string("gather_104"), val = int32(8)]; int32 gather_105_axis_0 = const()[name = string("gather_105_axis_0"), val = int32(0)]; int32 gather_105_batch_dims_0 = const()[name = string("gather_105_batch_dims_0"), val = int32(0)]; bool gather_105_validate_indices_0 = const()[name = string("gather_105_validate_indices_0"), val = bool(false)]; string var_1363_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1363_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_105_to_uint16 = const()[name = string("select_105_to_uint16"), val = uint16(2)]; tensor var_1363_shape_cast_fp16_to_uint16 = cast(dtype = var_1363_shape_cast_fp16_to_uint16_dtype_0, x = var_1363_shape_cast_fp16)[name = string("cast_696")]; uint16 gather_105_cast_uint16 = gather(axis = gather_105_axis_0, batch_dims = gather_105_batch_dims_0, indices = select_105_to_uint16, validate_indices = gather_105_validate_indices_0, x = var_1363_shape_cast_fp16_to_uint16)[name = string("gather_105_cast_uint16")]; string gather_105_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_105_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_106 = const()[name = string("gather_106"), val = int32(128)]; tensor var_1370_axes_0 = const()[name = string("op_1370_axes_0"), val = tensor([2])]; tensor var_1370_cast_fp16 = expand_dims(axes = var_1370_axes_0, x = var_1356_cast_fp16)[name = string("op_1370_cast_fp16")]; int32 concat_109_axis_0 = const()[name = string("concat_109_axis_0"), val = int32(0)]; bool concat_109_interleave_0 = const()[name = string("concat_109_interleave_0"), val = bool(false)]; int32 gather_105_cast_uint16_to_int32 = cast(dtype = gather_105_cast_uint16_to_int32_dtype_0, x = gather_105_cast_uint16)[name = string("cast_695")]; tensor concat_109 = concat(axis = concat_109_axis_0, interleave = concat_109_interleave_0, values = (gather_103, gather_104, var_78, gather_105_cast_uint16_to_int32, gather_106))[name = string("concat_109")]; tensor shape_117_cast_fp16 = shape(x = var_1370_cast_fp16)[name = string("shape_117_cast_fp16")]; tensor real_div_10 = real_div(x = concat_109, y = shape_117_cast_fp16)[name = string("real_div_10")]; tensor hidden_states_257_cast_fp16 = tile(reps = real_div_10, x = var_1370_cast_fp16)[name = string("hidden_states_257_cast_fp16")]; tensor concat_110x = const()[name = string("concat_110x"), val = tensor([1, 16, -1, 128])]; tensor key_21_cast_fp16 = reshape(shape = concat_110x, x = hidden_states_257_cast_fp16)[name = string("key_21_cast_fp16")]; tensor var_1380_shape_cast_fp16 = shape(x = var_1361_cast_fp16)[name = string("op_1380_shape_cast_fp16")]; int32 gather_107 = const()[name = string("gather_107"), val = int32(1)]; int32 gather_108 = const()[name = string("gather_108"), val = int32(8)]; int32 gather_109_axis_0 = const()[name = string("gather_109_axis_0"), val = int32(0)]; int32 gather_109_batch_dims_0 = const()[name = string("gather_109_batch_dims_0"), val = int32(0)]; bool gather_109_validate_indices_0 = const()[name = string("gather_109_validate_indices_0"), val = bool(false)]; string var_1380_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1380_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_109_to_uint16 = const()[name = string("select_109_to_uint16"), val = uint16(2)]; tensor var_1380_shape_cast_fp16_to_uint16 = cast(dtype = var_1380_shape_cast_fp16_to_uint16_dtype_0, x = var_1380_shape_cast_fp16)[name = string("cast_694")]; uint16 gather_109_cast_uint16 = gather(axis = gather_109_axis_0, batch_dims = gather_109_batch_dims_0, indices = select_109_to_uint16, validate_indices = gather_109_validate_indices_0, x = var_1380_shape_cast_fp16_to_uint16)[name = string("gather_109_cast_uint16")]; string gather_109_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_109_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_110 = const()[name = string("gather_110"), val = int32(128)]; tensor var_1387_axes_0 = const()[name = string("op_1387_axes_0"), val = tensor([2])]; tensor var_1387_cast_fp16 = expand_dims(axes = var_1387_axes_0, x = var_1361_cast_fp16)[name = string("op_1387_cast_fp16")]; int32 concat_111_axis_0 = const()[name = string("concat_111_axis_0"), val = int32(0)]; bool concat_111_interleave_0 = const()[name = string("concat_111_interleave_0"), val = bool(false)]; int32 gather_109_cast_uint16_to_int32 = cast(dtype = gather_109_cast_uint16_to_int32_dtype_0, x = gather_109_cast_uint16)[name = string("cast_693")]; tensor concat_111 = concat(axis = concat_111_axis_0, interleave = concat_111_interleave_0, values = (gather_107, gather_108, var_78, gather_109_cast_uint16_to_int32, gather_110))[name = string("concat_111")]; tensor shape_122_cast_fp16 = shape(x = var_1387_cast_fp16)[name = string("shape_122_cast_fp16")]; tensor real_div_11 = real_div(x = concat_111, y = shape_122_cast_fp16)[name = string("real_div_11")]; tensor hidden_states_261_cast_fp16 = tile(reps = real_div_11, x = var_1387_cast_fp16)[name = string("hidden_states_261_cast_fp16")]; tensor concat_112x = const()[name = string("concat_112x"), val = tensor([1, 16, -1, 128])]; tensor value_21_cast_fp16 = reshape(shape = concat_112x, x = hidden_states_261_cast_fp16)[name = string("value_21_cast_fp16")]; tensor var_1397_shape_cast_fp16 = shape(x = key_21_cast_fp16)[name = string("op_1397_shape_cast_fp16")]; int32 gather_111_axis_0 = const()[name = string("gather_111_axis_0"), val = int32(0)]; int32 gather_111_batch_dims_0 = const()[name = string("gather_111_batch_dims_0"), val = int32(0)]; bool gather_111_validate_indices_0 = const()[name = string("gather_111_validate_indices_0"), val = bool(false)]; string var_1397_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1397_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_111_to_uint16 = const()[name = string("select_111_to_uint16"), val = uint16(2)]; tensor var_1397_shape_cast_fp16_to_uint16 = cast(dtype = var_1397_shape_cast_fp16_to_uint16_dtype_0, x = var_1397_shape_cast_fp16)[name = string("cast_692")]; uint16 gather_111_cast_uint16 = gather(axis = gather_111_axis_0, batch_dims = gather_111_batch_dims_0, indices = select_111_to_uint16, validate_indices = gather_111_validate_indices_0, x = var_1397_shape_cast_fp16_to_uint16)[name = string("gather_111_cast_uint16")]; string gather_111_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_111_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_113_values0_0 = const()[name = string("concat_113_values0_0"), val = int32(1)]; int32 concat_113_values1_0 = const()[name = string("concat_113_values1_0"), val = int32(1)]; int32 concat_113_values2_0 = const()[name = string("concat_113_values2_0"), val = int32(0)]; int32 concat_113_axis_0 = const()[name = string("concat_113_axis_0"), val = int32(0)]; bool concat_113_interleave_0 = const()[name = string("concat_113_interleave_0"), val = bool(false)]; int32 gather_111_cast_uint16_to_int32 = cast(dtype = gather_111_cast_uint16_to_int32_dtype_0, x = gather_111_cast_uint16)[name = string("cast_691")]; tensor concat_113 = concat(axis = concat_113_axis_0, interleave = concat_113_interleave_0, values = (concat_113_values0_0, concat_113_values1_0, concat_113_values2_0, gather_111_cast_uint16_to_int32))[name = string("concat_113")]; tensor attention_mask_11_begin_0 = const()[name = string("attention_mask_11_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_11_end_mask_0 = const()[name = string("attention_mask_11_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_11_cast_fp16 = slice_by_index(begin = attention_mask_11_begin_0, end = concat_113, end_mask = attention_mask_11_end_mask_0, x = causal_mask)[name = string("attention_mask_11_cast_fp16")]; tensor mul_5_cast_fp16 = mul(x = query_21_cast_fp16, y = var_85_to_fp16)[name = string("mul_5_cast_fp16")]; bool matmul_5_transpose_y_0 = const()[name = string("matmul_5_transpose_y_0"), val = bool(true)]; bool matmul_5_transpose_x_0 = const()[name = string("matmul_5_transpose_x_0"), val = bool(false)]; tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = mul_5_cast_fp16, y = key_21_cast_fp16)[name = string("matmul_5_cast_fp16")]; tensor add_117_cast_fp16 = add(x = matmul_5_cast_fp16, y = attention_mask_11_cast_fp16)[name = string("add_117_cast_fp16")]; int32 softmax_5_axis_0 = const()[name = string("softmax_5_axis_0"), val = int32(-1)]; tensor softmax_5_cast_fp16 = softmax(axis = softmax_5_axis_0, x = add_117_cast_fp16)[name = string("softmax_5_cast_fp16")]; bool attn_output_21_transpose_x_0 = const()[name = string("attn_output_21_transpose_x_0"), val = bool(false)]; bool attn_output_21_transpose_y_0 = const()[name = string("attn_output_21_transpose_y_0"), val = bool(false)]; tensor attn_output_21_cast_fp16 = matmul(transpose_x = attn_output_21_transpose_x_0, transpose_y = attn_output_21_transpose_y_0, x = softmax_5_cast_fp16, y = value_21_cast_fp16)[name = string("attn_output_21_cast_fp16")]; tensor var_1406_perm_0 = const()[name = string("op_1406_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_114_axis_0 = const()[name = string("concat_114_axis_0"), val = int32(0)]; bool concat_114_interleave_0 = const()[name = string("concat_114_interleave_0"), val = bool(false)]; int32 gather_95_cast_uint16_to_int32 = cast(dtype = gather_95_cast_uint16_to_int32_dtype_0, x = gather_95_cast_uint16)[name = string("cast_697")]; tensor concat_114 = concat(axis = concat_114_axis_0, interleave = concat_114_interleave_0, values = (gather_94, gather_95_cast_uint16_to_int32, var_72))[name = string("concat_114")]; tensor var_1406_cast_fp16 = transpose(perm = var_1406_perm_0, x = attn_output_21_cast_fp16)[name = string("transpose_88")]; tensor var_1409_cast_fp16 = reshape(shape = concat_114, x = var_1406_cast_fp16)[name = string("op_1409_cast_fp16")]; tensor model_model_layers_5_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_5_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(476882240)))]; tensor linear_38_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_5_self_attn_o_proj_weight_to_fp16, x = var_1409_cast_fp16)[name = string("linear_38_cast_fp16")]; tensor hidden_states_265_cast_fp16 = add(x = hidden_states_229_cast_fp16, y = linear_38_cast_fp16)[name = string("hidden_states_265_cast_fp16")]; fp16 var_78_promoted_23_to_fp16 = const()[name = string("op_78_promoted_23_to_fp16"), val = fp16(0x1p+1)]; tensor var_1416_cast_fp16 = pow(x = hidden_states_265_cast_fp16, y = var_78_promoted_23_to_fp16)[name = string("op_1416_cast_fp16")]; tensor variance_47_axes_0 = const()[name = string("variance_47_axes_0"), val = tensor([-1])]; bool variance_47_keep_dims_0 = const()[name = string("variance_47_keep_dims_0"), val = bool(true)]; tensor variance_47_cast_fp16 = reduce_mean(axes = variance_47_axes_0, keep_dims = variance_47_keep_dims_0, x = var_1416_cast_fp16)[name = string("variance_47_cast_fp16")]; fp16 var_1419_to_fp16 = const()[name = string("op_1419_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1420_cast_fp16 = add(x = variance_47_cast_fp16, y = var_1419_to_fp16)[name = string("op_1420_cast_fp16")]; fp32 var_1421_epsilon_0 = const()[name = string("op_1421_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1421_cast_fp16 = rsqrt(epsilon = var_1421_epsilon_0, x = var_1420_cast_fp16)[name = string("op_1421_cast_fp16")]; tensor hidden_states_269_cast_fp16 = mul(x = hidden_states_265_cast_fp16, y = var_1421_cast_fp16)[name = string("hidden_states_269_cast_fp16")]; tensor model_model_layers_5_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_5_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481076608)))]; tensor input_43_cast_fp16 = mul(x = model_model_layers_5_post_attention_layernorm_weight_to_fp16, y = hidden_states_269_cast_fp16)[name = string("input_43_cast_fp16")]; tensor model_model_layers_5_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_5_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(481078720)))]; tensor linear_39_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_5_mlp_gate_proj_weight_to_fp16, x = input_43_cast_fp16)[name = string("linear_39_cast_fp16")]; tensor var_1433_cast_fp16 = silu(x = linear_39_cast_fp16)[name = string("op_1433_cast_fp16")]; tensor model_model_layers_5_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_5_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(487370240)))]; tensor linear_40_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_5_mlp_up_proj_weight_to_fp16, x = input_43_cast_fp16)[name = string("linear_40_cast_fp16")]; tensor input_47_cast_fp16 = mul(x = var_1433_cast_fp16, y = linear_40_cast_fp16)[name = string("input_47_cast_fp16")]; tensor model_model_layers_5_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_5_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(493661760)))]; tensor linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_5_mlp_down_proj_weight_to_fp16, x = input_47_cast_fp16)[name = string("linear_41_cast_fp16")]; tensor hidden_states_275_cast_fp16 = add(x = hidden_states_265_cast_fp16, y = linear_41_cast_fp16)[name = string("hidden_states_275_cast_fp16")]; fp16 var_78_promoted_24_to_fp16 = const()[name = string("op_78_promoted_24_to_fp16"), val = fp16(0x1p+1)]; tensor var_1446_cast_fp16 = pow(x = hidden_states_275_cast_fp16, y = var_78_promoted_24_to_fp16)[name = string("op_1446_cast_fp16")]; tensor variance_49_axes_0 = const()[name = string("variance_49_axes_0"), val = tensor([-1])]; bool variance_49_keep_dims_0 = const()[name = string("variance_49_keep_dims_0"), val = bool(true)]; tensor variance_49_cast_fp16 = reduce_mean(axes = variance_49_axes_0, keep_dims = variance_49_keep_dims_0, x = var_1446_cast_fp16)[name = string("variance_49_cast_fp16")]; fp16 var_1449_to_fp16 = const()[name = string("op_1449_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1450_cast_fp16 = add(x = variance_49_cast_fp16, y = var_1449_to_fp16)[name = string("op_1450_cast_fp16")]; fp32 var_1451_epsilon_0 = const()[name = string("op_1451_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1451_cast_fp16 = rsqrt(epsilon = var_1451_epsilon_0, x = var_1450_cast_fp16)[name = string("op_1451_cast_fp16")]; tensor hidden_states_279_cast_fp16 = mul(x = hidden_states_275_cast_fp16, y = var_1451_cast_fp16)[name = string("hidden_states_279_cast_fp16")]; tensor model_model_layers_6_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499953280)))]; tensor hidden_states_283_cast_fp16 = mul(x = model_model_layers_6_input_layernorm_weight_to_fp16, y = hidden_states_279_cast_fp16)[name = string("hidden_states_283_cast_fp16")]; tensor var_1464_shape_cast_fp16 = shape(x = hidden_states_283_cast_fp16)[name = string("op_1464_shape_cast_fp16")]; int32 gather_112 = const()[name = string("gather_112"), val = int32(1)]; int32 gather_113_axis_0 = const()[name = string("gather_113_axis_0"), val = int32(0)]; int32 gather_113_batch_dims_0 = const()[name = string("gather_113_batch_dims_0"), val = int32(0)]; bool gather_113_validate_indices_0 = const()[name = string("gather_113_validate_indices_0"), val = bool(false)]; string var_1464_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1464_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_113_to_uint16 = const()[name = string("select_113_to_uint16"), val = uint16(1)]; tensor var_1464_shape_cast_fp16_to_uint16 = cast(dtype = var_1464_shape_cast_fp16_to_uint16_dtype_0, x = var_1464_shape_cast_fp16)[name = string("cast_690")]; uint16 gather_113_cast_uint16 = gather(axis = gather_113_axis_0, batch_dims = gather_113_batch_dims_0, indices = select_113_to_uint16, validate_indices = gather_113_validate_indices_0, x = var_1464_shape_cast_fp16_to_uint16)[name = string("gather_113_cast_uint16")]; string gather_113_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_113_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_6_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(499955392)))]; tensor linear_42_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_6_self_attn_q_proj_weight_to_fp16, x = hidden_states_283_cast_fp16)[name = string("linear_42_cast_fp16")]; tensor concat_115x = const()[name = string("concat_115x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_285_cast_fp16 = reshape(shape = concat_115x, x = linear_42_cast_fp16)[name = string("hidden_states_285_cast_fp16")]; fp16 var_78_promoted_25_to_fp16 = const()[name = string("op_78_promoted_25_to_fp16"), val = fp16(0x1p+1)]; tensor var_1472_cast_fp16 = pow(x = hidden_states_285_cast_fp16, y = var_78_promoted_25_to_fp16)[name = string("op_1472_cast_fp16")]; tensor variance_51_axes_0 = const()[name = string("variance_51_axes_0"), val = tensor([-1])]; bool variance_51_keep_dims_0 = const()[name = string("variance_51_keep_dims_0"), val = bool(true)]; tensor variance_51_cast_fp16 = reduce_mean(axes = variance_51_axes_0, keep_dims = variance_51_keep_dims_0, x = var_1472_cast_fp16)[name = string("variance_51_cast_fp16")]; fp16 var_1475_to_fp16 = const()[name = string("op_1475_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1476_cast_fp16 = add(x = variance_51_cast_fp16, y = var_1475_to_fp16)[name = string("op_1476_cast_fp16")]; fp32 var_1477_epsilon_0 = const()[name = string("op_1477_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1477_cast_fp16 = rsqrt(epsilon = var_1477_epsilon_0, x = var_1476_cast_fp16)[name = string("op_1477_cast_fp16")]; tensor hidden_states_289_cast_fp16 = mul(x = hidden_states_285_cast_fp16, y = var_1477_cast_fp16)[name = string("hidden_states_289_cast_fp16")]; tensor model_model_layers_6_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504149760)))]; tensor var_1480_cast_fp16 = mul(x = model_model_layers_6_self_attn_q_norm_weight_to_fp16, y = hidden_states_289_cast_fp16)[name = string("op_1480_cast_fp16")]; tensor q_13_perm_0 = const()[name = string("q_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_6_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(504150080)))]; tensor linear_43_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_6_self_attn_k_proj_weight_to_fp16, x = hidden_states_283_cast_fp16)[name = string("linear_43_cast_fp16")]; tensor concat_116x = const()[name = string("concat_116x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_293_cast_fp16 = reshape(shape = concat_116x, x = linear_43_cast_fp16)[name = string("hidden_states_293_cast_fp16")]; fp16 var_78_promoted_26_to_fp16 = const()[name = string("op_78_promoted_26_to_fp16"), val = fp16(0x1p+1)]; tensor var_1488_cast_fp16 = pow(x = hidden_states_293_cast_fp16, y = var_78_promoted_26_to_fp16)[name = string("op_1488_cast_fp16")]; tensor variance_53_axes_0 = const()[name = string("variance_53_axes_0"), val = tensor([-1])]; bool variance_53_keep_dims_0 = const()[name = string("variance_53_keep_dims_0"), val = bool(true)]; tensor variance_53_cast_fp16 = reduce_mean(axes = variance_53_axes_0, keep_dims = variance_53_keep_dims_0, x = var_1488_cast_fp16)[name = string("variance_53_cast_fp16")]; fp16 var_1491_to_fp16 = const()[name = string("op_1491_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1492_cast_fp16 = add(x = variance_53_cast_fp16, y = var_1491_to_fp16)[name = string("op_1492_cast_fp16")]; fp32 var_1493_epsilon_0 = const()[name = string("op_1493_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1493_cast_fp16 = rsqrt(epsilon = var_1493_epsilon_0, x = var_1492_cast_fp16)[name = string("op_1493_cast_fp16")]; tensor hidden_states_297_cast_fp16 = mul(x = hidden_states_293_cast_fp16, y = var_1493_cast_fp16)[name = string("hidden_states_297_cast_fp16")]; tensor model_model_layers_6_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506247296)))]; tensor var_1496_cast_fp16 = mul(x = model_model_layers_6_self_attn_k_norm_weight_to_fp16, y = hidden_states_297_cast_fp16)[name = string("op_1496_cast_fp16")]; tensor k_13_perm_0 = const()[name = string("k_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_6_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(506247616)))]; tensor linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_6_self_attn_v_proj_weight_to_fp16, x = hidden_states_283_cast_fp16)[name = string("linear_44_cast_fp16")]; tensor concat_117x = const()[name = string("concat_117x"), val = tensor([1, -1, 8, 128])]; tensor var_1501_cast_fp16 = reshape(shape = concat_117x, x = linear_44_cast_fp16)[name = string("op_1501_cast_fp16")]; tensor v_state_13_perm_0 = const()[name = string("v_state_13_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_13_cast_fp16 = transpose(perm = q_13_perm_0, x = var_1480_cast_fp16)[name = string("transpose_87")]; tensor var_1505_cast_fp16 = mul(x = q_13_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1505_cast_fp16")]; tensor x1_25_begin_0 = const()[name = string("x1_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_25_end_0 = const()[name = string("x1_25_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_25_end_mask_0 = const()[name = string("x1_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_25_cast_fp16 = slice_by_index(begin = x1_25_begin_0, end = x1_25_end_0, end_mask = x1_25_end_mask_0, x = q_13_cast_fp16)[name = string("x1_25_cast_fp16")]; tensor x2_25_begin_0 = const()[name = string("x2_25_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_25_end_0 = const()[name = string("x2_25_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_25_end_mask_0 = const()[name = string("x2_25_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_25_cast_fp16 = slice_by_index(begin = x2_25_begin_0, end = x2_25_end_0, end_mask = x2_25_end_mask_0, x = q_13_cast_fp16)[name = string("x2_25_cast_fp16")]; fp16 const_16_promoted_to_fp16 = const()[name = string("const_16_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1516_cast_fp16 = mul(x = x2_25_cast_fp16, y = const_16_promoted_to_fp16)[name = string("op_1516_cast_fp16")]; bool var_1518_interleave_0 = const()[name = string("op_1518_interleave_0"), val = bool(false)]; tensor var_1518_cast_fp16 = concat(axis = var_72, interleave = var_1518_interleave_0, values = (var_1516_cast_fp16, x1_25_cast_fp16))[name = string("op_1518_cast_fp16")]; tensor var_1519_cast_fp16 = mul(x = var_1518_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1519_cast_fp16")]; tensor query_25_cast_fp16 = add(x = var_1505_cast_fp16, y = var_1519_cast_fp16)[name = string("query_25_cast_fp16")]; tensor k_13_cast_fp16 = transpose(perm = k_13_perm_0, x = var_1496_cast_fp16)[name = string("transpose_86")]; tensor var_1521_cast_fp16 = mul(x = k_13_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1521_cast_fp16")]; tensor x1_27_begin_0 = const()[name = string("x1_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_27_end_0 = const()[name = string("x1_27_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_27_end_mask_0 = const()[name = string("x1_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_27_cast_fp16 = slice_by_index(begin = x1_27_begin_0, end = x1_27_end_0, end_mask = x1_27_end_mask_0, x = k_13_cast_fp16)[name = string("x1_27_cast_fp16")]; tensor x2_27_begin_0 = const()[name = string("x2_27_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_27_end_0 = const()[name = string("x2_27_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_27_end_mask_0 = const()[name = string("x2_27_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_27_cast_fp16 = slice_by_index(begin = x2_27_begin_0, end = x2_27_end_0, end_mask = x2_27_end_mask_0, x = k_13_cast_fp16)[name = string("x2_27_cast_fp16")]; fp16 const_17_promoted_to_fp16 = const()[name = string("const_17_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1532_cast_fp16 = mul(x = x2_27_cast_fp16, y = const_17_promoted_to_fp16)[name = string("op_1532_cast_fp16")]; bool var_1534_interleave_0 = const()[name = string("op_1534_interleave_0"), val = bool(false)]; tensor var_1534_cast_fp16 = concat(axis = var_72, interleave = var_1534_interleave_0, values = (var_1532_cast_fp16, x1_27_cast_fp16))[name = string("op_1534_cast_fp16")]; tensor var_1535_cast_fp16 = mul(x = var_1534_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1535_cast_fp16")]; tensor k_state_13_cast_fp16 = add(x = var_1521_cast_fp16, y = var_1535_cast_fp16)[name = string("k_state_13_cast_fp16")]; tensor expand_dims_72 = const()[name = string("expand_dims_72"), val = tensor([0])]; tensor expand_dims_73 = const()[name = string("expand_dims_73"), val = tensor([0])]; tensor expand_dims_75 = const()[name = string("expand_dims_75"), val = tensor([0])]; tensor concat_120_values0_0 = const()[name = string("concat_120_values0_0"), val = tensor([6])]; int32 concat_120_axis_0 = const()[name = string("concat_120_axis_0"), val = int32(0)]; bool concat_120_interleave_0 = const()[name = string("concat_120_interleave_0"), val = bool(false)]; tensor concat_120 = concat(axis = concat_120_axis_0, interleave = concat_120_interleave_0, values = (concat_120_values0_0, expand_dims_72, expand_dims_73, expand_dims_2, expand_dims_75))[name = string("concat_120")]; tensor key_cache_internal_tensor_assign_7_stride_0 = const()[name = string("key_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_120, begin_mask = key_cache_internal_tensor_assign_7_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_7_squeeze_mask_0, stride = key_cache_internal_tensor_assign_7_stride_0, update = k_state_13_cast_fp16, x = coreml_update_state_66)[name = string("key_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_7_cast_fp16, input = key_cache)[name = string("coreml_update_state_68_write_state")]; tensor coreml_update_state_68 = read_state(input = key_cache)[name = string("coreml_update_state_68")]; tensor value_cache_internal_tensor_assign_7_stride_0 = const()[name = string("value_cache_internal_tensor_assign_7_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_7_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_7_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_7_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_7_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_13_cast_fp16 = transpose(perm = v_state_13_perm_0, x = var_1501_cast_fp16)[name = string("transpose_85")]; tensor value_cache_internal_tensor_assign_7_cast_fp16 = slice_update(begin = concat_120, begin_mask = value_cache_internal_tensor_assign_7_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_7_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_7_squeeze_mask_0, stride = value_cache_internal_tensor_assign_7_stride_0, update = v_state_13_cast_fp16, x = coreml_update_state_67)[name = string("value_cache_internal_tensor_assign_7_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_7_cast_fp16, input = value_cache)[name = string("coreml_update_state_69_write_state")]; tensor coreml_update_state_69 = read_state(input = value_cache)[name = string("coreml_update_state_69")]; tensor var_1558_begin_0 = const()[name = string("op_1558_begin_0"), val = tensor([6, 0, 0, 0, 0])]; tensor var_1558_end_0 = const()[name = string("op_1558_end_0"), val = tensor([7, 1, 8, 2048, 128])]; tensor var_1558_end_mask_0 = const()[name = string("op_1558_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1558_squeeze_mask_0 = const()[name = string("op_1558_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1558_cast_fp16 = slice_by_index(begin = var_1558_begin_0, end = var_1558_end_0, end_mask = var_1558_end_mask_0, squeeze_mask = var_1558_squeeze_mask_0, x = coreml_update_state_68)[name = string("op_1558_cast_fp16")]; tensor var_1561_begin_0 = const()[name = string("op_1561_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1561_end_mask_0 = const()[name = string("op_1561_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1561_cast_fp16 = slice_by_index(begin = var_1561_begin_0, end = concat_12, end_mask = var_1561_end_mask_0, x = var_1558_cast_fp16)[name = string("op_1561_cast_fp16")]; tensor var_1563_begin_0 = const()[name = string("op_1563_begin_0"), val = tensor([6, 0, 0, 0, 0])]; tensor var_1563_end_0 = const()[name = string("op_1563_end_0"), val = tensor([7, 1, 8, 2048, 128])]; tensor var_1563_end_mask_0 = const()[name = string("op_1563_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1563_squeeze_mask_0 = const()[name = string("op_1563_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1563_cast_fp16 = slice_by_index(begin = var_1563_begin_0, end = var_1563_end_0, end_mask = var_1563_end_mask_0, squeeze_mask = var_1563_squeeze_mask_0, x = coreml_update_state_69)[name = string("op_1563_cast_fp16")]; tensor var_1566_begin_0 = const()[name = string("op_1566_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1566_end_mask_0 = const()[name = string("op_1566_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1566_cast_fp16 = slice_by_index(begin = var_1566_begin_0, end = concat_12, end_mask = var_1566_end_mask_0, x = var_1563_cast_fp16)[name = string("op_1566_cast_fp16")]; tensor var_1568_shape_cast_fp16 = shape(x = var_1561_cast_fp16)[name = string("op_1568_shape_cast_fp16")]; int32 gather_121 = const()[name = string("gather_121"), val = int32(1)]; int32 gather_122 = const()[name = string("gather_122"), val = int32(8)]; int32 gather_123_axis_0 = const()[name = string("gather_123_axis_0"), val = int32(0)]; int32 gather_123_batch_dims_0 = const()[name = string("gather_123_batch_dims_0"), val = int32(0)]; bool gather_123_validate_indices_0 = const()[name = string("gather_123_validate_indices_0"), val = bool(false)]; string var_1568_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1568_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_123_to_uint16 = const()[name = string("select_123_to_uint16"), val = uint16(2)]; tensor var_1568_shape_cast_fp16_to_uint16 = cast(dtype = var_1568_shape_cast_fp16_to_uint16_dtype_0, x = var_1568_shape_cast_fp16)[name = string("cast_688")]; uint16 gather_123_cast_uint16 = gather(axis = gather_123_axis_0, batch_dims = gather_123_batch_dims_0, indices = select_123_to_uint16, validate_indices = gather_123_validate_indices_0, x = var_1568_shape_cast_fp16_to_uint16)[name = string("gather_123_cast_uint16")]; string gather_123_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_123_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_124 = const()[name = string("gather_124"), val = int32(128)]; tensor var_1575_axes_0 = const()[name = string("op_1575_axes_0"), val = tensor([2])]; tensor var_1575_cast_fp16 = expand_dims(axes = var_1575_axes_0, x = var_1561_cast_fp16)[name = string("op_1575_cast_fp16")]; int32 concat_128_axis_0 = const()[name = string("concat_128_axis_0"), val = int32(0)]; bool concat_128_interleave_0 = const()[name = string("concat_128_interleave_0"), val = bool(false)]; int32 gather_123_cast_uint16_to_int32 = cast(dtype = gather_123_cast_uint16_to_int32_dtype_0, x = gather_123_cast_uint16)[name = string("cast_687")]; tensor concat_128 = concat(axis = concat_128_axis_0, interleave = concat_128_interleave_0, values = (gather_121, gather_122, var_78, gather_123_cast_uint16_to_int32, gather_124))[name = string("concat_128")]; tensor shape_137_cast_fp16 = shape(x = var_1575_cast_fp16)[name = string("shape_137_cast_fp16")]; tensor real_div_12 = real_div(x = concat_128, y = shape_137_cast_fp16)[name = string("real_div_12")]; tensor hidden_states_303_cast_fp16 = tile(reps = real_div_12, x = var_1575_cast_fp16)[name = string("hidden_states_303_cast_fp16")]; tensor concat_129x = const()[name = string("concat_129x"), val = tensor([1, 16, -1, 128])]; tensor key_25_cast_fp16 = reshape(shape = concat_129x, x = hidden_states_303_cast_fp16)[name = string("key_25_cast_fp16")]; tensor var_1585_shape_cast_fp16 = shape(x = var_1566_cast_fp16)[name = string("op_1585_shape_cast_fp16")]; int32 gather_125 = const()[name = string("gather_125"), val = int32(1)]; int32 gather_126 = const()[name = string("gather_126"), val = int32(8)]; int32 gather_127_axis_0 = const()[name = string("gather_127_axis_0"), val = int32(0)]; int32 gather_127_batch_dims_0 = const()[name = string("gather_127_batch_dims_0"), val = int32(0)]; bool gather_127_validate_indices_0 = const()[name = string("gather_127_validate_indices_0"), val = bool(false)]; string var_1585_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1585_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_127_to_uint16 = const()[name = string("select_127_to_uint16"), val = uint16(2)]; tensor var_1585_shape_cast_fp16_to_uint16 = cast(dtype = var_1585_shape_cast_fp16_to_uint16_dtype_0, x = var_1585_shape_cast_fp16)[name = string("cast_686")]; uint16 gather_127_cast_uint16 = gather(axis = gather_127_axis_0, batch_dims = gather_127_batch_dims_0, indices = select_127_to_uint16, validate_indices = gather_127_validate_indices_0, x = var_1585_shape_cast_fp16_to_uint16)[name = string("gather_127_cast_uint16")]; string gather_127_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_127_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_128 = const()[name = string("gather_128"), val = int32(128)]; tensor var_1592_axes_0 = const()[name = string("op_1592_axes_0"), val = tensor([2])]; tensor var_1592_cast_fp16 = expand_dims(axes = var_1592_axes_0, x = var_1566_cast_fp16)[name = string("op_1592_cast_fp16")]; int32 concat_130_axis_0 = const()[name = string("concat_130_axis_0"), val = int32(0)]; bool concat_130_interleave_0 = const()[name = string("concat_130_interleave_0"), val = bool(false)]; int32 gather_127_cast_uint16_to_int32 = cast(dtype = gather_127_cast_uint16_to_int32_dtype_0, x = gather_127_cast_uint16)[name = string("cast_685")]; tensor concat_130 = concat(axis = concat_130_axis_0, interleave = concat_130_interleave_0, values = (gather_125, gather_126, var_78, gather_127_cast_uint16_to_int32, gather_128))[name = string("concat_130")]; tensor shape_142_cast_fp16 = shape(x = var_1592_cast_fp16)[name = string("shape_142_cast_fp16")]; tensor real_div_13 = real_div(x = concat_130, y = shape_142_cast_fp16)[name = string("real_div_13")]; tensor hidden_states_307_cast_fp16 = tile(reps = real_div_13, x = var_1592_cast_fp16)[name = string("hidden_states_307_cast_fp16")]; tensor concat_131x = const()[name = string("concat_131x"), val = tensor([1, 16, -1, 128])]; tensor value_25_cast_fp16 = reshape(shape = concat_131x, x = hidden_states_307_cast_fp16)[name = string("value_25_cast_fp16")]; tensor var_1602_shape_cast_fp16 = shape(x = key_25_cast_fp16)[name = string("op_1602_shape_cast_fp16")]; int32 gather_129_axis_0 = const()[name = string("gather_129_axis_0"), val = int32(0)]; int32 gather_129_batch_dims_0 = const()[name = string("gather_129_batch_dims_0"), val = int32(0)]; bool gather_129_validate_indices_0 = const()[name = string("gather_129_validate_indices_0"), val = bool(false)]; string var_1602_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1602_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_129_to_uint16 = const()[name = string("select_129_to_uint16"), val = uint16(2)]; tensor var_1602_shape_cast_fp16_to_uint16 = cast(dtype = var_1602_shape_cast_fp16_to_uint16_dtype_0, x = var_1602_shape_cast_fp16)[name = string("cast_684")]; uint16 gather_129_cast_uint16 = gather(axis = gather_129_axis_0, batch_dims = gather_129_batch_dims_0, indices = select_129_to_uint16, validate_indices = gather_129_validate_indices_0, x = var_1602_shape_cast_fp16_to_uint16)[name = string("gather_129_cast_uint16")]; string gather_129_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_129_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_132_values0_0 = const()[name = string("concat_132_values0_0"), val = int32(1)]; int32 concat_132_values1_0 = const()[name = string("concat_132_values1_0"), val = int32(1)]; int32 concat_132_values2_0 = const()[name = string("concat_132_values2_0"), val = int32(0)]; int32 concat_132_axis_0 = const()[name = string("concat_132_axis_0"), val = int32(0)]; bool concat_132_interleave_0 = const()[name = string("concat_132_interleave_0"), val = bool(false)]; int32 gather_129_cast_uint16_to_int32 = cast(dtype = gather_129_cast_uint16_to_int32_dtype_0, x = gather_129_cast_uint16)[name = string("cast_683")]; tensor concat_132 = concat(axis = concat_132_axis_0, interleave = concat_132_interleave_0, values = (concat_132_values0_0, concat_132_values1_0, concat_132_values2_0, gather_129_cast_uint16_to_int32))[name = string("concat_132")]; tensor attention_mask_13_begin_0 = const()[name = string("attention_mask_13_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_13_end_mask_0 = const()[name = string("attention_mask_13_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_13_cast_fp16 = slice_by_index(begin = attention_mask_13_begin_0, end = concat_132, end_mask = attention_mask_13_end_mask_0, x = causal_mask)[name = string("attention_mask_13_cast_fp16")]; tensor mul_6_cast_fp16 = mul(x = query_25_cast_fp16, y = var_85_to_fp16)[name = string("mul_6_cast_fp16")]; bool matmul_6_transpose_y_0 = const()[name = string("matmul_6_transpose_y_0"), val = bool(true)]; bool matmul_6_transpose_x_0 = const()[name = string("matmul_6_transpose_x_0"), val = bool(false)]; tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = mul_6_cast_fp16, y = key_25_cast_fp16)[name = string("matmul_6_cast_fp16")]; tensor add_136_cast_fp16 = add(x = matmul_6_cast_fp16, y = attention_mask_13_cast_fp16)[name = string("add_136_cast_fp16")]; int32 softmax_6_axis_0 = const()[name = string("softmax_6_axis_0"), val = int32(-1)]; tensor softmax_6_cast_fp16 = softmax(axis = softmax_6_axis_0, x = add_136_cast_fp16)[name = string("softmax_6_cast_fp16")]; bool attn_output_25_transpose_x_0 = const()[name = string("attn_output_25_transpose_x_0"), val = bool(false)]; bool attn_output_25_transpose_y_0 = const()[name = string("attn_output_25_transpose_y_0"), val = bool(false)]; tensor attn_output_25_cast_fp16 = matmul(transpose_x = attn_output_25_transpose_x_0, transpose_y = attn_output_25_transpose_y_0, x = softmax_6_cast_fp16, y = value_25_cast_fp16)[name = string("attn_output_25_cast_fp16")]; tensor var_1611_perm_0 = const()[name = string("op_1611_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_133_axis_0 = const()[name = string("concat_133_axis_0"), val = int32(0)]; bool concat_133_interleave_0 = const()[name = string("concat_133_interleave_0"), val = bool(false)]; int32 gather_113_cast_uint16_to_int32 = cast(dtype = gather_113_cast_uint16_to_int32_dtype_0, x = gather_113_cast_uint16)[name = string("cast_689")]; tensor concat_133 = concat(axis = concat_133_axis_0, interleave = concat_133_interleave_0, values = (gather_112, gather_113_cast_uint16_to_int32, var_72))[name = string("concat_133")]; tensor var_1611_cast_fp16 = transpose(perm = var_1611_perm_0, x = attn_output_25_cast_fp16)[name = string("transpose_84")]; tensor var_1614_cast_fp16 = reshape(shape = concat_133, x = var_1611_cast_fp16)[name = string("op_1614_cast_fp16")]; tensor model_model_layers_6_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_6_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(508344832)))]; tensor linear_45_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_6_self_attn_o_proj_weight_to_fp16, x = var_1614_cast_fp16)[name = string("linear_45_cast_fp16")]; tensor hidden_states_311_cast_fp16 = add(x = hidden_states_275_cast_fp16, y = linear_45_cast_fp16)[name = string("hidden_states_311_cast_fp16")]; fp16 var_78_promoted_27_to_fp16 = const()[name = string("op_78_promoted_27_to_fp16"), val = fp16(0x1p+1)]; tensor var_1621_cast_fp16 = pow(x = hidden_states_311_cast_fp16, y = var_78_promoted_27_to_fp16)[name = string("op_1621_cast_fp16")]; tensor variance_55_axes_0 = const()[name = string("variance_55_axes_0"), val = tensor([-1])]; bool variance_55_keep_dims_0 = const()[name = string("variance_55_keep_dims_0"), val = bool(true)]; tensor variance_55_cast_fp16 = reduce_mean(axes = variance_55_axes_0, keep_dims = variance_55_keep_dims_0, x = var_1621_cast_fp16)[name = string("variance_55_cast_fp16")]; fp16 var_1624_to_fp16 = const()[name = string("op_1624_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1625_cast_fp16 = add(x = variance_55_cast_fp16, y = var_1624_to_fp16)[name = string("op_1625_cast_fp16")]; fp32 var_1626_epsilon_0 = const()[name = string("op_1626_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1626_cast_fp16 = rsqrt(epsilon = var_1626_epsilon_0, x = var_1625_cast_fp16)[name = string("op_1626_cast_fp16")]; tensor hidden_states_315_cast_fp16 = mul(x = hidden_states_311_cast_fp16, y = var_1626_cast_fp16)[name = string("hidden_states_315_cast_fp16")]; tensor model_model_layers_6_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_6_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512539200)))]; tensor input_51_cast_fp16 = mul(x = model_model_layers_6_post_attention_layernorm_weight_to_fp16, y = hidden_states_315_cast_fp16)[name = string("input_51_cast_fp16")]; tensor model_model_layers_6_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_6_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(512541312)))]; tensor linear_46_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_6_mlp_gate_proj_weight_to_fp16, x = input_51_cast_fp16)[name = string("linear_46_cast_fp16")]; tensor var_1638_cast_fp16 = silu(x = linear_46_cast_fp16)[name = string("op_1638_cast_fp16")]; tensor model_model_layers_6_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_6_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(518832832)))]; tensor linear_47_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_6_mlp_up_proj_weight_to_fp16, x = input_51_cast_fp16)[name = string("linear_47_cast_fp16")]; tensor input_55_cast_fp16 = mul(x = var_1638_cast_fp16, y = linear_47_cast_fp16)[name = string("input_55_cast_fp16")]; tensor model_model_layers_6_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_6_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(525124352)))]; tensor linear_48_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_6_mlp_down_proj_weight_to_fp16, x = input_55_cast_fp16)[name = string("linear_48_cast_fp16")]; tensor hidden_states_321_cast_fp16 = add(x = hidden_states_311_cast_fp16, y = linear_48_cast_fp16)[name = string("hidden_states_321_cast_fp16")]; fp16 var_78_promoted_28_to_fp16 = const()[name = string("op_78_promoted_28_to_fp16"), val = fp16(0x1p+1)]; tensor var_1651_cast_fp16 = pow(x = hidden_states_321_cast_fp16, y = var_78_promoted_28_to_fp16)[name = string("op_1651_cast_fp16")]; tensor variance_57_axes_0 = const()[name = string("variance_57_axes_0"), val = tensor([-1])]; bool variance_57_keep_dims_0 = const()[name = string("variance_57_keep_dims_0"), val = bool(true)]; tensor variance_57_cast_fp16 = reduce_mean(axes = variance_57_axes_0, keep_dims = variance_57_keep_dims_0, x = var_1651_cast_fp16)[name = string("variance_57_cast_fp16")]; fp16 var_1654_to_fp16 = const()[name = string("op_1654_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1655_cast_fp16 = add(x = variance_57_cast_fp16, y = var_1654_to_fp16)[name = string("op_1655_cast_fp16")]; fp32 var_1656_epsilon_0 = const()[name = string("op_1656_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1656_cast_fp16 = rsqrt(epsilon = var_1656_epsilon_0, x = var_1655_cast_fp16)[name = string("op_1656_cast_fp16")]; tensor hidden_states_325_cast_fp16 = mul(x = hidden_states_321_cast_fp16, y = var_1656_cast_fp16)[name = string("hidden_states_325_cast_fp16")]; tensor model_model_layers_7_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531415872)))]; tensor hidden_states_329_cast_fp16 = mul(x = model_model_layers_7_input_layernorm_weight_to_fp16, y = hidden_states_325_cast_fp16)[name = string("hidden_states_329_cast_fp16")]; tensor var_1669_shape_cast_fp16 = shape(x = hidden_states_329_cast_fp16)[name = string("op_1669_shape_cast_fp16")]; int32 gather_130 = const()[name = string("gather_130"), val = int32(1)]; int32 gather_131_axis_0 = const()[name = string("gather_131_axis_0"), val = int32(0)]; int32 gather_131_batch_dims_0 = const()[name = string("gather_131_batch_dims_0"), val = int32(0)]; bool gather_131_validate_indices_0 = const()[name = string("gather_131_validate_indices_0"), val = bool(false)]; string var_1669_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1669_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_131_to_uint16 = const()[name = string("select_131_to_uint16"), val = uint16(1)]; tensor var_1669_shape_cast_fp16_to_uint16 = cast(dtype = var_1669_shape_cast_fp16_to_uint16_dtype_0, x = var_1669_shape_cast_fp16)[name = string("cast_682")]; uint16 gather_131_cast_uint16 = gather(axis = gather_131_axis_0, batch_dims = gather_131_batch_dims_0, indices = select_131_to_uint16, validate_indices = gather_131_validate_indices_0, x = var_1669_shape_cast_fp16_to_uint16)[name = string("gather_131_cast_uint16")]; string gather_131_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_131_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_7_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(531417984)))]; tensor linear_49_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_7_self_attn_q_proj_weight_to_fp16, x = hidden_states_329_cast_fp16)[name = string("linear_49_cast_fp16")]; tensor concat_134x = const()[name = string("concat_134x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_331_cast_fp16 = reshape(shape = concat_134x, x = linear_49_cast_fp16)[name = string("hidden_states_331_cast_fp16")]; fp16 var_78_promoted_29_to_fp16 = const()[name = string("op_78_promoted_29_to_fp16"), val = fp16(0x1p+1)]; tensor var_1677_cast_fp16 = pow(x = hidden_states_331_cast_fp16, y = var_78_promoted_29_to_fp16)[name = string("op_1677_cast_fp16")]; tensor variance_59_axes_0 = const()[name = string("variance_59_axes_0"), val = tensor([-1])]; bool variance_59_keep_dims_0 = const()[name = string("variance_59_keep_dims_0"), val = bool(true)]; tensor variance_59_cast_fp16 = reduce_mean(axes = variance_59_axes_0, keep_dims = variance_59_keep_dims_0, x = var_1677_cast_fp16)[name = string("variance_59_cast_fp16")]; fp16 var_1680_to_fp16 = const()[name = string("op_1680_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1681_cast_fp16 = add(x = variance_59_cast_fp16, y = var_1680_to_fp16)[name = string("op_1681_cast_fp16")]; fp32 var_1682_epsilon_0 = const()[name = string("op_1682_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1682_cast_fp16 = rsqrt(epsilon = var_1682_epsilon_0, x = var_1681_cast_fp16)[name = string("op_1682_cast_fp16")]; tensor hidden_states_335_cast_fp16 = mul(x = hidden_states_331_cast_fp16, y = var_1682_cast_fp16)[name = string("hidden_states_335_cast_fp16")]; tensor model_model_layers_7_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535612352)))]; tensor var_1685_cast_fp16 = mul(x = model_model_layers_7_self_attn_q_norm_weight_to_fp16, y = hidden_states_335_cast_fp16)[name = string("op_1685_cast_fp16")]; tensor q_15_perm_0 = const()[name = string("q_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_7_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(535612672)))]; tensor linear_50_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_7_self_attn_k_proj_weight_to_fp16, x = hidden_states_329_cast_fp16)[name = string("linear_50_cast_fp16")]; tensor concat_135x = const()[name = string("concat_135x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_339_cast_fp16 = reshape(shape = concat_135x, x = linear_50_cast_fp16)[name = string("hidden_states_339_cast_fp16")]; fp16 var_78_promoted_30_to_fp16 = const()[name = string("op_78_promoted_30_to_fp16"), val = fp16(0x1p+1)]; tensor var_1693_cast_fp16 = pow(x = hidden_states_339_cast_fp16, y = var_78_promoted_30_to_fp16)[name = string("op_1693_cast_fp16")]; tensor variance_61_axes_0 = const()[name = string("variance_61_axes_0"), val = tensor([-1])]; bool variance_61_keep_dims_0 = const()[name = string("variance_61_keep_dims_0"), val = bool(true)]; tensor variance_61_cast_fp16 = reduce_mean(axes = variance_61_axes_0, keep_dims = variance_61_keep_dims_0, x = var_1693_cast_fp16)[name = string("variance_61_cast_fp16")]; fp16 var_1696_to_fp16 = const()[name = string("op_1696_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1697_cast_fp16 = add(x = variance_61_cast_fp16, y = var_1696_to_fp16)[name = string("op_1697_cast_fp16")]; fp32 var_1698_epsilon_0 = const()[name = string("op_1698_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1698_cast_fp16 = rsqrt(epsilon = var_1698_epsilon_0, x = var_1697_cast_fp16)[name = string("op_1698_cast_fp16")]; tensor hidden_states_343_cast_fp16 = mul(x = hidden_states_339_cast_fp16, y = var_1698_cast_fp16)[name = string("hidden_states_343_cast_fp16")]; tensor model_model_layers_7_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537709888)))]; tensor var_1701_cast_fp16 = mul(x = model_model_layers_7_self_attn_k_norm_weight_to_fp16, y = hidden_states_343_cast_fp16)[name = string("op_1701_cast_fp16")]; tensor k_15_perm_0 = const()[name = string("k_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_7_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(537710208)))]; tensor linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_7_self_attn_v_proj_weight_to_fp16, x = hidden_states_329_cast_fp16)[name = string("linear_51_cast_fp16")]; tensor concat_136x = const()[name = string("concat_136x"), val = tensor([1, -1, 8, 128])]; tensor var_1706_cast_fp16 = reshape(shape = concat_136x, x = linear_51_cast_fp16)[name = string("op_1706_cast_fp16")]; tensor v_state_15_perm_0 = const()[name = string("v_state_15_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_15_cast_fp16 = transpose(perm = q_15_perm_0, x = var_1685_cast_fp16)[name = string("transpose_83")]; tensor var_1710_cast_fp16 = mul(x = q_15_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1710_cast_fp16")]; tensor x1_29_begin_0 = const()[name = string("x1_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_29_end_0 = const()[name = string("x1_29_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_29_end_mask_0 = const()[name = string("x1_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_29_cast_fp16 = slice_by_index(begin = x1_29_begin_0, end = x1_29_end_0, end_mask = x1_29_end_mask_0, x = q_15_cast_fp16)[name = string("x1_29_cast_fp16")]; tensor x2_29_begin_0 = const()[name = string("x2_29_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_29_end_0 = const()[name = string("x2_29_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_29_end_mask_0 = const()[name = string("x2_29_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_29_cast_fp16 = slice_by_index(begin = x2_29_begin_0, end = x2_29_end_0, end_mask = x2_29_end_mask_0, x = q_15_cast_fp16)[name = string("x2_29_cast_fp16")]; fp16 const_18_promoted_to_fp16 = const()[name = string("const_18_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1721_cast_fp16 = mul(x = x2_29_cast_fp16, y = const_18_promoted_to_fp16)[name = string("op_1721_cast_fp16")]; bool var_1723_interleave_0 = const()[name = string("op_1723_interleave_0"), val = bool(false)]; tensor var_1723_cast_fp16 = concat(axis = var_72, interleave = var_1723_interleave_0, values = (var_1721_cast_fp16, x1_29_cast_fp16))[name = string("op_1723_cast_fp16")]; tensor var_1724_cast_fp16 = mul(x = var_1723_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1724_cast_fp16")]; tensor query_29_cast_fp16 = add(x = var_1710_cast_fp16, y = var_1724_cast_fp16)[name = string("query_29_cast_fp16")]; tensor k_15_cast_fp16 = transpose(perm = k_15_perm_0, x = var_1701_cast_fp16)[name = string("transpose_82")]; tensor var_1726_cast_fp16 = mul(x = k_15_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1726_cast_fp16")]; tensor x1_31_begin_0 = const()[name = string("x1_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_31_end_0 = const()[name = string("x1_31_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_31_end_mask_0 = const()[name = string("x1_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_31_cast_fp16 = slice_by_index(begin = x1_31_begin_0, end = x1_31_end_0, end_mask = x1_31_end_mask_0, x = k_15_cast_fp16)[name = string("x1_31_cast_fp16")]; tensor x2_31_begin_0 = const()[name = string("x2_31_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_31_end_0 = const()[name = string("x2_31_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_31_end_mask_0 = const()[name = string("x2_31_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_31_cast_fp16 = slice_by_index(begin = x2_31_begin_0, end = x2_31_end_0, end_mask = x2_31_end_mask_0, x = k_15_cast_fp16)[name = string("x2_31_cast_fp16")]; fp16 const_19_promoted_to_fp16 = const()[name = string("const_19_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1737_cast_fp16 = mul(x = x2_31_cast_fp16, y = const_19_promoted_to_fp16)[name = string("op_1737_cast_fp16")]; bool var_1739_interleave_0 = const()[name = string("op_1739_interleave_0"), val = bool(false)]; tensor var_1739_cast_fp16 = concat(axis = var_72, interleave = var_1739_interleave_0, values = (var_1737_cast_fp16, x1_31_cast_fp16))[name = string("op_1739_cast_fp16")]; tensor var_1740_cast_fp16 = mul(x = var_1739_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1740_cast_fp16")]; tensor k_state_15_cast_fp16 = add(x = var_1726_cast_fp16, y = var_1740_cast_fp16)[name = string("k_state_15_cast_fp16")]; tensor expand_dims_84 = const()[name = string("expand_dims_84"), val = tensor([0])]; tensor expand_dims_85 = const()[name = string("expand_dims_85"), val = tensor([0])]; tensor expand_dims_87 = const()[name = string("expand_dims_87"), val = tensor([0])]; tensor concat_139_values0_0 = const()[name = string("concat_139_values0_0"), val = tensor([7])]; int32 concat_139_axis_0 = const()[name = string("concat_139_axis_0"), val = int32(0)]; bool concat_139_interleave_0 = const()[name = string("concat_139_interleave_0"), val = bool(false)]; tensor concat_139 = concat(axis = concat_139_axis_0, interleave = concat_139_interleave_0, values = (concat_139_values0_0, expand_dims_84, expand_dims_85, expand_dims_2, expand_dims_87))[name = string("concat_139")]; tensor key_cache_internal_tensor_assign_8_stride_0 = const()[name = string("key_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_139, begin_mask = key_cache_internal_tensor_assign_8_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_8_squeeze_mask_0, stride = key_cache_internal_tensor_assign_8_stride_0, update = k_state_15_cast_fp16, x = coreml_update_state_68)[name = string("key_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_8_cast_fp16, input = key_cache)[name = string("coreml_update_state_70_write_state")]; tensor coreml_update_state_70 = read_state(input = key_cache)[name = string("coreml_update_state_70")]; tensor value_cache_internal_tensor_assign_8_stride_0 = const()[name = string("value_cache_internal_tensor_assign_8_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_8_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_8_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_8_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_8_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_15_cast_fp16 = transpose(perm = v_state_15_perm_0, x = var_1706_cast_fp16)[name = string("transpose_81")]; tensor value_cache_internal_tensor_assign_8_cast_fp16 = slice_update(begin = concat_139, begin_mask = value_cache_internal_tensor_assign_8_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_8_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_8_squeeze_mask_0, stride = value_cache_internal_tensor_assign_8_stride_0, update = v_state_15_cast_fp16, x = coreml_update_state_69)[name = string("value_cache_internal_tensor_assign_8_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_8_cast_fp16, input = value_cache)[name = string("coreml_update_state_71_write_state")]; tensor coreml_update_state_71 = read_state(input = value_cache)[name = string("coreml_update_state_71")]; tensor var_1763_begin_0 = const()[name = string("op_1763_begin_0"), val = tensor([7, 0, 0, 0, 0])]; tensor var_1763_end_0 = const()[name = string("op_1763_end_0"), val = tensor([8, 1, 8, 2048, 128])]; tensor var_1763_end_mask_0 = const()[name = string("op_1763_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1763_squeeze_mask_0 = const()[name = string("op_1763_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1763_cast_fp16 = slice_by_index(begin = var_1763_begin_0, end = var_1763_end_0, end_mask = var_1763_end_mask_0, squeeze_mask = var_1763_squeeze_mask_0, x = coreml_update_state_70)[name = string("op_1763_cast_fp16")]; tensor var_1766_begin_0 = const()[name = string("op_1766_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1766_end_mask_0 = const()[name = string("op_1766_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1766_cast_fp16 = slice_by_index(begin = var_1766_begin_0, end = concat_12, end_mask = var_1766_end_mask_0, x = var_1763_cast_fp16)[name = string("op_1766_cast_fp16")]; tensor var_1768_begin_0 = const()[name = string("op_1768_begin_0"), val = tensor([7, 0, 0, 0, 0])]; tensor var_1768_end_0 = const()[name = string("op_1768_end_0"), val = tensor([8, 1, 8, 2048, 128])]; tensor var_1768_end_mask_0 = const()[name = string("op_1768_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1768_squeeze_mask_0 = const()[name = string("op_1768_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1768_cast_fp16 = slice_by_index(begin = var_1768_begin_0, end = var_1768_end_0, end_mask = var_1768_end_mask_0, squeeze_mask = var_1768_squeeze_mask_0, x = coreml_update_state_71)[name = string("op_1768_cast_fp16")]; tensor var_1771_begin_0 = const()[name = string("op_1771_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1771_end_mask_0 = const()[name = string("op_1771_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1771_cast_fp16 = slice_by_index(begin = var_1771_begin_0, end = concat_12, end_mask = var_1771_end_mask_0, x = var_1768_cast_fp16)[name = string("op_1771_cast_fp16")]; tensor var_1773_shape_cast_fp16 = shape(x = var_1766_cast_fp16)[name = string("op_1773_shape_cast_fp16")]; int32 gather_139 = const()[name = string("gather_139"), val = int32(1)]; int32 gather_140 = const()[name = string("gather_140"), val = int32(8)]; int32 gather_141_axis_0 = const()[name = string("gather_141_axis_0"), val = int32(0)]; int32 gather_141_batch_dims_0 = const()[name = string("gather_141_batch_dims_0"), val = int32(0)]; bool gather_141_validate_indices_0 = const()[name = string("gather_141_validate_indices_0"), val = bool(false)]; string var_1773_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1773_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_141_to_uint16 = const()[name = string("select_141_to_uint16"), val = uint16(2)]; tensor var_1773_shape_cast_fp16_to_uint16 = cast(dtype = var_1773_shape_cast_fp16_to_uint16_dtype_0, x = var_1773_shape_cast_fp16)[name = string("cast_680")]; uint16 gather_141_cast_uint16 = gather(axis = gather_141_axis_0, batch_dims = gather_141_batch_dims_0, indices = select_141_to_uint16, validate_indices = gather_141_validate_indices_0, x = var_1773_shape_cast_fp16_to_uint16)[name = string("gather_141_cast_uint16")]; string gather_141_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_141_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_142 = const()[name = string("gather_142"), val = int32(128)]; tensor var_1780_axes_0 = const()[name = string("op_1780_axes_0"), val = tensor([2])]; tensor var_1780_cast_fp16 = expand_dims(axes = var_1780_axes_0, x = var_1766_cast_fp16)[name = string("op_1780_cast_fp16")]; int32 concat_147_axis_0 = const()[name = string("concat_147_axis_0"), val = int32(0)]; bool concat_147_interleave_0 = const()[name = string("concat_147_interleave_0"), val = bool(false)]; int32 gather_141_cast_uint16_to_int32 = cast(dtype = gather_141_cast_uint16_to_int32_dtype_0, x = gather_141_cast_uint16)[name = string("cast_679")]; tensor concat_147 = concat(axis = concat_147_axis_0, interleave = concat_147_interleave_0, values = (gather_139, gather_140, var_78, gather_141_cast_uint16_to_int32, gather_142))[name = string("concat_147")]; tensor shape_157_cast_fp16 = shape(x = var_1780_cast_fp16)[name = string("shape_157_cast_fp16")]; tensor real_div_14 = real_div(x = concat_147, y = shape_157_cast_fp16)[name = string("real_div_14")]; tensor hidden_states_349_cast_fp16 = tile(reps = real_div_14, x = var_1780_cast_fp16)[name = string("hidden_states_349_cast_fp16")]; tensor concat_148x = const()[name = string("concat_148x"), val = tensor([1, 16, -1, 128])]; tensor key_29_cast_fp16 = reshape(shape = concat_148x, x = hidden_states_349_cast_fp16)[name = string("key_29_cast_fp16")]; tensor var_1790_shape_cast_fp16 = shape(x = var_1771_cast_fp16)[name = string("op_1790_shape_cast_fp16")]; int32 gather_143 = const()[name = string("gather_143"), val = int32(1)]; int32 gather_144 = const()[name = string("gather_144"), val = int32(8)]; int32 gather_145_axis_0 = const()[name = string("gather_145_axis_0"), val = int32(0)]; int32 gather_145_batch_dims_0 = const()[name = string("gather_145_batch_dims_0"), val = int32(0)]; bool gather_145_validate_indices_0 = const()[name = string("gather_145_validate_indices_0"), val = bool(false)]; string var_1790_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1790_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_145_to_uint16 = const()[name = string("select_145_to_uint16"), val = uint16(2)]; tensor var_1790_shape_cast_fp16_to_uint16 = cast(dtype = var_1790_shape_cast_fp16_to_uint16_dtype_0, x = var_1790_shape_cast_fp16)[name = string("cast_678")]; uint16 gather_145_cast_uint16 = gather(axis = gather_145_axis_0, batch_dims = gather_145_batch_dims_0, indices = select_145_to_uint16, validate_indices = gather_145_validate_indices_0, x = var_1790_shape_cast_fp16_to_uint16)[name = string("gather_145_cast_uint16")]; string gather_145_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_145_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_146 = const()[name = string("gather_146"), val = int32(128)]; tensor var_1797_axes_0 = const()[name = string("op_1797_axes_0"), val = tensor([2])]; tensor var_1797_cast_fp16 = expand_dims(axes = var_1797_axes_0, x = var_1771_cast_fp16)[name = string("op_1797_cast_fp16")]; int32 concat_149_axis_0 = const()[name = string("concat_149_axis_0"), val = int32(0)]; bool concat_149_interleave_0 = const()[name = string("concat_149_interleave_0"), val = bool(false)]; int32 gather_145_cast_uint16_to_int32 = cast(dtype = gather_145_cast_uint16_to_int32_dtype_0, x = gather_145_cast_uint16)[name = string("cast_677")]; tensor concat_149 = concat(axis = concat_149_axis_0, interleave = concat_149_interleave_0, values = (gather_143, gather_144, var_78, gather_145_cast_uint16_to_int32, gather_146))[name = string("concat_149")]; tensor shape_162_cast_fp16 = shape(x = var_1797_cast_fp16)[name = string("shape_162_cast_fp16")]; tensor real_div_15 = real_div(x = concat_149, y = shape_162_cast_fp16)[name = string("real_div_15")]; tensor hidden_states_353_cast_fp16 = tile(reps = real_div_15, x = var_1797_cast_fp16)[name = string("hidden_states_353_cast_fp16")]; tensor concat_150x = const()[name = string("concat_150x"), val = tensor([1, 16, -1, 128])]; tensor value_29_cast_fp16 = reshape(shape = concat_150x, x = hidden_states_353_cast_fp16)[name = string("value_29_cast_fp16")]; tensor var_1807_shape_cast_fp16 = shape(x = key_29_cast_fp16)[name = string("op_1807_shape_cast_fp16")]; int32 gather_147_axis_0 = const()[name = string("gather_147_axis_0"), val = int32(0)]; int32 gather_147_batch_dims_0 = const()[name = string("gather_147_batch_dims_0"), val = int32(0)]; bool gather_147_validate_indices_0 = const()[name = string("gather_147_validate_indices_0"), val = bool(false)]; string var_1807_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1807_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_147_to_uint16 = const()[name = string("select_147_to_uint16"), val = uint16(2)]; tensor var_1807_shape_cast_fp16_to_uint16 = cast(dtype = var_1807_shape_cast_fp16_to_uint16_dtype_0, x = var_1807_shape_cast_fp16)[name = string("cast_676")]; uint16 gather_147_cast_uint16 = gather(axis = gather_147_axis_0, batch_dims = gather_147_batch_dims_0, indices = select_147_to_uint16, validate_indices = gather_147_validate_indices_0, x = var_1807_shape_cast_fp16_to_uint16)[name = string("gather_147_cast_uint16")]; string gather_147_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_147_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_151_values0_0 = const()[name = string("concat_151_values0_0"), val = int32(1)]; int32 concat_151_values1_0 = const()[name = string("concat_151_values1_0"), val = int32(1)]; int32 concat_151_values2_0 = const()[name = string("concat_151_values2_0"), val = int32(0)]; int32 concat_151_axis_0 = const()[name = string("concat_151_axis_0"), val = int32(0)]; bool concat_151_interleave_0 = const()[name = string("concat_151_interleave_0"), val = bool(false)]; int32 gather_147_cast_uint16_to_int32 = cast(dtype = gather_147_cast_uint16_to_int32_dtype_0, x = gather_147_cast_uint16)[name = string("cast_675")]; tensor concat_151 = concat(axis = concat_151_axis_0, interleave = concat_151_interleave_0, values = (concat_151_values0_0, concat_151_values1_0, concat_151_values2_0, gather_147_cast_uint16_to_int32))[name = string("concat_151")]; tensor attention_mask_15_begin_0 = const()[name = string("attention_mask_15_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_15_end_mask_0 = const()[name = string("attention_mask_15_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_15_cast_fp16 = slice_by_index(begin = attention_mask_15_begin_0, end = concat_151, end_mask = attention_mask_15_end_mask_0, x = causal_mask)[name = string("attention_mask_15_cast_fp16")]; tensor mul_7_cast_fp16 = mul(x = query_29_cast_fp16, y = var_85_to_fp16)[name = string("mul_7_cast_fp16")]; bool matmul_7_transpose_y_0 = const()[name = string("matmul_7_transpose_y_0"), val = bool(true)]; bool matmul_7_transpose_x_0 = const()[name = string("matmul_7_transpose_x_0"), val = bool(false)]; tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = mul_7_cast_fp16, y = key_29_cast_fp16)[name = string("matmul_7_cast_fp16")]; tensor add_155_cast_fp16 = add(x = matmul_7_cast_fp16, y = attention_mask_15_cast_fp16)[name = string("add_155_cast_fp16")]; int32 softmax_7_axis_0 = const()[name = string("softmax_7_axis_0"), val = int32(-1)]; tensor softmax_7_cast_fp16 = softmax(axis = softmax_7_axis_0, x = add_155_cast_fp16)[name = string("softmax_7_cast_fp16")]; bool attn_output_29_transpose_x_0 = const()[name = string("attn_output_29_transpose_x_0"), val = bool(false)]; bool attn_output_29_transpose_y_0 = const()[name = string("attn_output_29_transpose_y_0"), val = bool(false)]; tensor attn_output_29_cast_fp16 = matmul(transpose_x = attn_output_29_transpose_x_0, transpose_y = attn_output_29_transpose_y_0, x = softmax_7_cast_fp16, y = value_29_cast_fp16)[name = string("attn_output_29_cast_fp16")]; tensor var_1816_perm_0 = const()[name = string("op_1816_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_152_axis_0 = const()[name = string("concat_152_axis_0"), val = int32(0)]; bool concat_152_interleave_0 = const()[name = string("concat_152_interleave_0"), val = bool(false)]; int32 gather_131_cast_uint16_to_int32 = cast(dtype = gather_131_cast_uint16_to_int32_dtype_0, x = gather_131_cast_uint16)[name = string("cast_681")]; tensor concat_152 = concat(axis = concat_152_axis_0, interleave = concat_152_interleave_0, values = (gather_130, gather_131_cast_uint16_to_int32, var_72))[name = string("concat_152")]; tensor var_1816_cast_fp16 = transpose(perm = var_1816_perm_0, x = attn_output_29_cast_fp16)[name = string("transpose_80")]; tensor var_1819_cast_fp16 = reshape(shape = concat_152, x = var_1816_cast_fp16)[name = string("op_1819_cast_fp16")]; tensor model_model_layers_7_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_7_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(539807424)))]; tensor linear_52_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_7_self_attn_o_proj_weight_to_fp16, x = var_1819_cast_fp16)[name = string("linear_52_cast_fp16")]; tensor hidden_states_357_cast_fp16 = add(x = hidden_states_321_cast_fp16, y = linear_52_cast_fp16)[name = string("hidden_states_357_cast_fp16")]; fp16 var_78_promoted_31_to_fp16 = const()[name = string("op_78_promoted_31_to_fp16"), val = fp16(0x1p+1)]; tensor var_1826_cast_fp16 = pow(x = hidden_states_357_cast_fp16, y = var_78_promoted_31_to_fp16)[name = string("op_1826_cast_fp16")]; tensor variance_63_axes_0 = const()[name = string("variance_63_axes_0"), val = tensor([-1])]; bool variance_63_keep_dims_0 = const()[name = string("variance_63_keep_dims_0"), val = bool(true)]; tensor variance_63_cast_fp16 = reduce_mean(axes = variance_63_axes_0, keep_dims = variance_63_keep_dims_0, x = var_1826_cast_fp16)[name = string("variance_63_cast_fp16")]; fp16 var_1829_to_fp16 = const()[name = string("op_1829_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1830_cast_fp16 = add(x = variance_63_cast_fp16, y = var_1829_to_fp16)[name = string("op_1830_cast_fp16")]; fp32 var_1831_epsilon_0 = const()[name = string("op_1831_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1831_cast_fp16 = rsqrt(epsilon = var_1831_epsilon_0, x = var_1830_cast_fp16)[name = string("op_1831_cast_fp16")]; tensor hidden_states_361_cast_fp16 = mul(x = hidden_states_357_cast_fp16, y = var_1831_cast_fp16)[name = string("hidden_states_361_cast_fp16")]; tensor model_model_layers_7_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_7_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544001792)))]; tensor input_59_cast_fp16 = mul(x = model_model_layers_7_post_attention_layernorm_weight_to_fp16, y = hidden_states_361_cast_fp16)[name = string("input_59_cast_fp16")]; tensor model_model_layers_7_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_7_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(544003904)))]; tensor linear_53_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_7_mlp_gate_proj_weight_to_fp16, x = input_59_cast_fp16)[name = string("linear_53_cast_fp16")]; tensor var_1843_cast_fp16 = silu(x = linear_53_cast_fp16)[name = string("op_1843_cast_fp16")]; tensor model_model_layers_7_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_7_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(550295424)))]; tensor linear_54_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_7_mlp_up_proj_weight_to_fp16, x = input_59_cast_fp16)[name = string("linear_54_cast_fp16")]; tensor input_63_cast_fp16 = mul(x = var_1843_cast_fp16, y = linear_54_cast_fp16)[name = string("input_63_cast_fp16")]; tensor model_model_layers_7_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_7_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(556586944)))]; tensor linear_55_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_7_mlp_down_proj_weight_to_fp16, x = input_63_cast_fp16)[name = string("linear_55_cast_fp16")]; tensor hidden_states_367_cast_fp16 = add(x = hidden_states_357_cast_fp16, y = linear_55_cast_fp16)[name = string("hidden_states_367_cast_fp16")]; fp16 var_78_promoted_32_to_fp16 = const()[name = string("op_78_promoted_32_to_fp16"), val = fp16(0x1p+1)]; tensor var_1856_cast_fp16 = pow(x = hidden_states_367_cast_fp16, y = var_78_promoted_32_to_fp16)[name = string("op_1856_cast_fp16")]; tensor variance_65_axes_0 = const()[name = string("variance_65_axes_0"), val = tensor([-1])]; bool variance_65_keep_dims_0 = const()[name = string("variance_65_keep_dims_0"), val = bool(true)]; tensor variance_65_cast_fp16 = reduce_mean(axes = variance_65_axes_0, keep_dims = variance_65_keep_dims_0, x = var_1856_cast_fp16)[name = string("variance_65_cast_fp16")]; fp16 var_1859_to_fp16 = const()[name = string("op_1859_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1860_cast_fp16 = add(x = variance_65_cast_fp16, y = var_1859_to_fp16)[name = string("op_1860_cast_fp16")]; fp32 var_1861_epsilon_0 = const()[name = string("op_1861_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1861_cast_fp16 = rsqrt(epsilon = var_1861_epsilon_0, x = var_1860_cast_fp16)[name = string("op_1861_cast_fp16")]; tensor hidden_states_371_cast_fp16 = mul(x = hidden_states_367_cast_fp16, y = var_1861_cast_fp16)[name = string("hidden_states_371_cast_fp16")]; tensor model_model_layers_8_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562878464)))]; tensor hidden_states_375_cast_fp16 = mul(x = model_model_layers_8_input_layernorm_weight_to_fp16, y = hidden_states_371_cast_fp16)[name = string("hidden_states_375_cast_fp16")]; tensor var_1874_shape_cast_fp16 = shape(x = hidden_states_375_cast_fp16)[name = string("op_1874_shape_cast_fp16")]; int32 gather_148 = const()[name = string("gather_148"), val = int32(1)]; int32 gather_149_axis_0 = const()[name = string("gather_149_axis_0"), val = int32(0)]; int32 gather_149_batch_dims_0 = const()[name = string("gather_149_batch_dims_0"), val = int32(0)]; bool gather_149_validate_indices_0 = const()[name = string("gather_149_validate_indices_0"), val = bool(false)]; string var_1874_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1874_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_149_to_uint16 = const()[name = string("select_149_to_uint16"), val = uint16(1)]; tensor var_1874_shape_cast_fp16_to_uint16 = cast(dtype = var_1874_shape_cast_fp16_to_uint16_dtype_0, x = var_1874_shape_cast_fp16)[name = string("cast_674")]; uint16 gather_149_cast_uint16 = gather(axis = gather_149_axis_0, batch_dims = gather_149_batch_dims_0, indices = select_149_to_uint16, validate_indices = gather_149_validate_indices_0, x = var_1874_shape_cast_fp16_to_uint16)[name = string("gather_149_cast_uint16")]; string gather_149_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_149_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_8_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(562880576)))]; tensor linear_56_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_8_self_attn_q_proj_weight_to_fp16, x = hidden_states_375_cast_fp16)[name = string("linear_56_cast_fp16")]; tensor concat_153x = const()[name = string("concat_153x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_377_cast_fp16 = reshape(shape = concat_153x, x = linear_56_cast_fp16)[name = string("hidden_states_377_cast_fp16")]; fp16 var_78_promoted_33_to_fp16 = const()[name = string("op_78_promoted_33_to_fp16"), val = fp16(0x1p+1)]; tensor var_1882_cast_fp16 = pow(x = hidden_states_377_cast_fp16, y = var_78_promoted_33_to_fp16)[name = string("op_1882_cast_fp16")]; tensor variance_67_axes_0 = const()[name = string("variance_67_axes_0"), val = tensor([-1])]; bool variance_67_keep_dims_0 = const()[name = string("variance_67_keep_dims_0"), val = bool(true)]; tensor variance_67_cast_fp16 = reduce_mean(axes = variance_67_axes_0, keep_dims = variance_67_keep_dims_0, x = var_1882_cast_fp16)[name = string("variance_67_cast_fp16")]; fp16 var_1885_to_fp16 = const()[name = string("op_1885_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1886_cast_fp16 = add(x = variance_67_cast_fp16, y = var_1885_to_fp16)[name = string("op_1886_cast_fp16")]; fp32 var_1887_epsilon_0 = const()[name = string("op_1887_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1887_cast_fp16 = rsqrt(epsilon = var_1887_epsilon_0, x = var_1886_cast_fp16)[name = string("op_1887_cast_fp16")]; tensor hidden_states_381_cast_fp16 = mul(x = hidden_states_377_cast_fp16, y = var_1887_cast_fp16)[name = string("hidden_states_381_cast_fp16")]; tensor model_model_layers_8_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567074944)))]; tensor var_1890_cast_fp16 = mul(x = model_model_layers_8_self_attn_q_norm_weight_to_fp16, y = hidden_states_381_cast_fp16)[name = string("op_1890_cast_fp16")]; tensor q_17_perm_0 = const()[name = string("q_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_8_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(567075264)))]; tensor linear_57_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_8_self_attn_k_proj_weight_to_fp16, x = hidden_states_375_cast_fp16)[name = string("linear_57_cast_fp16")]; tensor concat_154x = const()[name = string("concat_154x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_385_cast_fp16 = reshape(shape = concat_154x, x = linear_57_cast_fp16)[name = string("hidden_states_385_cast_fp16")]; fp16 var_78_promoted_34_to_fp16 = const()[name = string("op_78_promoted_34_to_fp16"), val = fp16(0x1p+1)]; tensor var_1898_cast_fp16 = pow(x = hidden_states_385_cast_fp16, y = var_78_promoted_34_to_fp16)[name = string("op_1898_cast_fp16")]; tensor variance_69_axes_0 = const()[name = string("variance_69_axes_0"), val = tensor([-1])]; bool variance_69_keep_dims_0 = const()[name = string("variance_69_keep_dims_0"), val = bool(true)]; tensor variance_69_cast_fp16 = reduce_mean(axes = variance_69_axes_0, keep_dims = variance_69_keep_dims_0, x = var_1898_cast_fp16)[name = string("variance_69_cast_fp16")]; fp16 var_1901_to_fp16 = const()[name = string("op_1901_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_1902_cast_fp16 = add(x = variance_69_cast_fp16, y = var_1901_to_fp16)[name = string("op_1902_cast_fp16")]; fp32 var_1903_epsilon_0 = const()[name = string("op_1903_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_1903_cast_fp16 = rsqrt(epsilon = var_1903_epsilon_0, x = var_1902_cast_fp16)[name = string("op_1903_cast_fp16")]; tensor hidden_states_389_cast_fp16 = mul(x = hidden_states_385_cast_fp16, y = var_1903_cast_fp16)[name = string("hidden_states_389_cast_fp16")]; tensor model_model_layers_8_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569172480)))]; tensor var_1906_cast_fp16 = mul(x = model_model_layers_8_self_attn_k_norm_weight_to_fp16, y = hidden_states_389_cast_fp16)[name = string("op_1906_cast_fp16")]; tensor k_17_perm_0 = const()[name = string("k_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_8_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(569172800)))]; tensor linear_58_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_8_self_attn_v_proj_weight_to_fp16, x = hidden_states_375_cast_fp16)[name = string("linear_58_cast_fp16")]; tensor concat_155x = const()[name = string("concat_155x"), val = tensor([1, -1, 8, 128])]; tensor var_1911_cast_fp16 = reshape(shape = concat_155x, x = linear_58_cast_fp16)[name = string("op_1911_cast_fp16")]; tensor v_state_17_perm_0 = const()[name = string("v_state_17_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_17_cast_fp16 = transpose(perm = q_17_perm_0, x = var_1890_cast_fp16)[name = string("transpose_79")]; tensor var_1915_cast_fp16 = mul(x = q_17_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1915_cast_fp16")]; tensor x1_33_begin_0 = const()[name = string("x1_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_33_end_0 = const()[name = string("x1_33_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_33_end_mask_0 = const()[name = string("x1_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_33_cast_fp16 = slice_by_index(begin = x1_33_begin_0, end = x1_33_end_0, end_mask = x1_33_end_mask_0, x = q_17_cast_fp16)[name = string("x1_33_cast_fp16")]; tensor x2_33_begin_0 = const()[name = string("x2_33_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_33_end_0 = const()[name = string("x2_33_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_33_end_mask_0 = const()[name = string("x2_33_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_33_cast_fp16 = slice_by_index(begin = x2_33_begin_0, end = x2_33_end_0, end_mask = x2_33_end_mask_0, x = q_17_cast_fp16)[name = string("x2_33_cast_fp16")]; fp16 const_20_promoted_to_fp16 = const()[name = string("const_20_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1926_cast_fp16 = mul(x = x2_33_cast_fp16, y = const_20_promoted_to_fp16)[name = string("op_1926_cast_fp16")]; bool var_1928_interleave_0 = const()[name = string("op_1928_interleave_0"), val = bool(false)]; tensor var_1928_cast_fp16 = concat(axis = var_72, interleave = var_1928_interleave_0, values = (var_1926_cast_fp16, x1_33_cast_fp16))[name = string("op_1928_cast_fp16")]; tensor var_1929_cast_fp16 = mul(x = var_1928_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1929_cast_fp16")]; tensor query_33_cast_fp16 = add(x = var_1915_cast_fp16, y = var_1929_cast_fp16)[name = string("query_33_cast_fp16")]; tensor k_17_cast_fp16 = transpose(perm = k_17_perm_0, x = var_1906_cast_fp16)[name = string("transpose_78")]; tensor var_1931_cast_fp16 = mul(x = k_17_cast_fp16, y = cos_5_cast_fp16)[name = string("op_1931_cast_fp16")]; tensor x1_35_begin_0 = const()[name = string("x1_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_35_end_0 = const()[name = string("x1_35_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_35_end_mask_0 = const()[name = string("x1_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_35_cast_fp16 = slice_by_index(begin = x1_35_begin_0, end = x1_35_end_0, end_mask = x1_35_end_mask_0, x = k_17_cast_fp16)[name = string("x1_35_cast_fp16")]; tensor x2_35_begin_0 = const()[name = string("x2_35_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_35_end_0 = const()[name = string("x2_35_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_35_end_mask_0 = const()[name = string("x2_35_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_35_cast_fp16 = slice_by_index(begin = x2_35_begin_0, end = x2_35_end_0, end_mask = x2_35_end_mask_0, x = k_17_cast_fp16)[name = string("x2_35_cast_fp16")]; fp16 const_21_promoted_to_fp16 = const()[name = string("const_21_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_1942_cast_fp16 = mul(x = x2_35_cast_fp16, y = const_21_promoted_to_fp16)[name = string("op_1942_cast_fp16")]; bool var_1944_interleave_0 = const()[name = string("op_1944_interleave_0"), val = bool(false)]; tensor var_1944_cast_fp16 = concat(axis = var_72, interleave = var_1944_interleave_0, values = (var_1942_cast_fp16, x1_35_cast_fp16))[name = string("op_1944_cast_fp16")]; tensor var_1945_cast_fp16 = mul(x = var_1944_cast_fp16, y = sin_5_cast_fp16)[name = string("op_1945_cast_fp16")]; tensor k_state_17_cast_fp16 = add(x = var_1931_cast_fp16, y = var_1945_cast_fp16)[name = string("k_state_17_cast_fp16")]; tensor expand_dims_96 = const()[name = string("expand_dims_96"), val = tensor([0])]; tensor expand_dims_97 = const()[name = string("expand_dims_97"), val = tensor([0])]; tensor expand_dims_99 = const()[name = string("expand_dims_99"), val = tensor([0])]; tensor concat_158_values0_0 = const()[name = string("concat_158_values0_0"), val = tensor([8])]; int32 concat_158_axis_0 = const()[name = string("concat_158_axis_0"), val = int32(0)]; bool concat_158_interleave_0 = const()[name = string("concat_158_interleave_0"), val = bool(false)]; tensor concat_158 = concat(axis = concat_158_axis_0, interleave = concat_158_interleave_0, values = (concat_158_values0_0, expand_dims_96, expand_dims_97, expand_dims_2, expand_dims_99))[name = string("concat_158")]; tensor key_cache_internal_tensor_assign_9_stride_0 = const()[name = string("key_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_158, begin_mask = key_cache_internal_tensor_assign_9_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_9_squeeze_mask_0, stride = key_cache_internal_tensor_assign_9_stride_0, update = k_state_17_cast_fp16, x = coreml_update_state_70)[name = string("key_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_9_cast_fp16, input = key_cache)[name = string("coreml_update_state_72_write_state")]; tensor coreml_update_state_72 = read_state(input = key_cache)[name = string("coreml_update_state_72")]; tensor value_cache_internal_tensor_assign_9_stride_0 = const()[name = string("value_cache_internal_tensor_assign_9_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_9_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_9_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_9_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_9_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_17_cast_fp16 = transpose(perm = v_state_17_perm_0, x = var_1911_cast_fp16)[name = string("transpose_77")]; tensor value_cache_internal_tensor_assign_9_cast_fp16 = slice_update(begin = concat_158, begin_mask = value_cache_internal_tensor_assign_9_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_9_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_9_squeeze_mask_0, stride = value_cache_internal_tensor_assign_9_stride_0, update = v_state_17_cast_fp16, x = coreml_update_state_71)[name = string("value_cache_internal_tensor_assign_9_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_9_cast_fp16, input = value_cache)[name = string("coreml_update_state_73_write_state")]; tensor coreml_update_state_73 = read_state(input = value_cache)[name = string("coreml_update_state_73")]; tensor var_1968_begin_0 = const()[name = string("op_1968_begin_0"), val = tensor([8, 0, 0, 0, 0])]; tensor var_1968_end_0 = const()[name = string("op_1968_end_0"), val = tensor([9, 1, 8, 2048, 128])]; tensor var_1968_end_mask_0 = const()[name = string("op_1968_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1968_squeeze_mask_0 = const()[name = string("op_1968_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1968_cast_fp16 = slice_by_index(begin = var_1968_begin_0, end = var_1968_end_0, end_mask = var_1968_end_mask_0, squeeze_mask = var_1968_squeeze_mask_0, x = coreml_update_state_72)[name = string("op_1968_cast_fp16")]; tensor var_1971_begin_0 = const()[name = string("op_1971_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1971_end_mask_0 = const()[name = string("op_1971_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1971_cast_fp16 = slice_by_index(begin = var_1971_begin_0, end = concat_12, end_mask = var_1971_end_mask_0, x = var_1968_cast_fp16)[name = string("op_1971_cast_fp16")]; tensor var_1973_begin_0 = const()[name = string("op_1973_begin_0"), val = tensor([8, 0, 0, 0, 0])]; tensor var_1973_end_0 = const()[name = string("op_1973_end_0"), val = tensor([9, 1, 8, 2048, 128])]; tensor var_1973_end_mask_0 = const()[name = string("op_1973_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_1973_squeeze_mask_0 = const()[name = string("op_1973_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_1973_cast_fp16 = slice_by_index(begin = var_1973_begin_0, end = var_1973_end_0, end_mask = var_1973_end_mask_0, squeeze_mask = var_1973_squeeze_mask_0, x = coreml_update_state_73)[name = string("op_1973_cast_fp16")]; tensor var_1976_begin_0 = const()[name = string("op_1976_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_1976_end_mask_0 = const()[name = string("op_1976_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_1976_cast_fp16 = slice_by_index(begin = var_1976_begin_0, end = concat_12, end_mask = var_1976_end_mask_0, x = var_1973_cast_fp16)[name = string("op_1976_cast_fp16")]; tensor var_1978_shape_cast_fp16 = shape(x = var_1971_cast_fp16)[name = string("op_1978_shape_cast_fp16")]; int32 gather_157 = const()[name = string("gather_157"), val = int32(1)]; int32 gather_158 = const()[name = string("gather_158"), val = int32(8)]; int32 gather_159_axis_0 = const()[name = string("gather_159_axis_0"), val = int32(0)]; int32 gather_159_batch_dims_0 = const()[name = string("gather_159_batch_dims_0"), val = int32(0)]; bool gather_159_validate_indices_0 = const()[name = string("gather_159_validate_indices_0"), val = bool(false)]; string var_1978_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1978_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_159_to_uint16 = const()[name = string("select_159_to_uint16"), val = uint16(2)]; tensor var_1978_shape_cast_fp16_to_uint16 = cast(dtype = var_1978_shape_cast_fp16_to_uint16_dtype_0, x = var_1978_shape_cast_fp16)[name = string("cast_672")]; uint16 gather_159_cast_uint16 = gather(axis = gather_159_axis_0, batch_dims = gather_159_batch_dims_0, indices = select_159_to_uint16, validate_indices = gather_159_validate_indices_0, x = var_1978_shape_cast_fp16_to_uint16)[name = string("gather_159_cast_uint16")]; string gather_159_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_159_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_160 = const()[name = string("gather_160"), val = int32(128)]; tensor var_1985_axes_0 = const()[name = string("op_1985_axes_0"), val = tensor([2])]; tensor var_1985_cast_fp16 = expand_dims(axes = var_1985_axes_0, x = var_1971_cast_fp16)[name = string("op_1985_cast_fp16")]; int32 concat_166_axis_0 = const()[name = string("concat_166_axis_0"), val = int32(0)]; bool concat_166_interleave_0 = const()[name = string("concat_166_interleave_0"), val = bool(false)]; int32 gather_159_cast_uint16_to_int32 = cast(dtype = gather_159_cast_uint16_to_int32_dtype_0, x = gather_159_cast_uint16)[name = string("cast_671")]; tensor concat_166 = concat(axis = concat_166_axis_0, interleave = concat_166_interleave_0, values = (gather_157, gather_158, var_78, gather_159_cast_uint16_to_int32, gather_160))[name = string("concat_166")]; tensor shape_177_cast_fp16 = shape(x = var_1985_cast_fp16)[name = string("shape_177_cast_fp16")]; tensor real_div_16 = real_div(x = concat_166, y = shape_177_cast_fp16)[name = string("real_div_16")]; tensor hidden_states_395_cast_fp16 = tile(reps = real_div_16, x = var_1985_cast_fp16)[name = string("hidden_states_395_cast_fp16")]; tensor concat_167x = const()[name = string("concat_167x"), val = tensor([1, 16, -1, 128])]; tensor key_33_cast_fp16 = reshape(shape = concat_167x, x = hidden_states_395_cast_fp16)[name = string("key_33_cast_fp16")]; tensor var_1995_shape_cast_fp16 = shape(x = var_1976_cast_fp16)[name = string("op_1995_shape_cast_fp16")]; int32 gather_161 = const()[name = string("gather_161"), val = int32(1)]; int32 gather_162 = const()[name = string("gather_162"), val = int32(8)]; int32 gather_163_axis_0 = const()[name = string("gather_163_axis_0"), val = int32(0)]; int32 gather_163_batch_dims_0 = const()[name = string("gather_163_batch_dims_0"), val = int32(0)]; bool gather_163_validate_indices_0 = const()[name = string("gather_163_validate_indices_0"), val = bool(false)]; string var_1995_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_1995_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_163_to_uint16 = const()[name = string("select_163_to_uint16"), val = uint16(2)]; tensor var_1995_shape_cast_fp16_to_uint16 = cast(dtype = var_1995_shape_cast_fp16_to_uint16_dtype_0, x = var_1995_shape_cast_fp16)[name = string("cast_670")]; uint16 gather_163_cast_uint16 = gather(axis = gather_163_axis_0, batch_dims = gather_163_batch_dims_0, indices = select_163_to_uint16, validate_indices = gather_163_validate_indices_0, x = var_1995_shape_cast_fp16_to_uint16)[name = string("gather_163_cast_uint16")]; string gather_163_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_163_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_164 = const()[name = string("gather_164"), val = int32(128)]; tensor var_2002_axes_0 = const()[name = string("op_2002_axes_0"), val = tensor([2])]; tensor var_2002_cast_fp16 = expand_dims(axes = var_2002_axes_0, x = var_1976_cast_fp16)[name = string("op_2002_cast_fp16")]; int32 concat_168_axis_0 = const()[name = string("concat_168_axis_0"), val = int32(0)]; bool concat_168_interleave_0 = const()[name = string("concat_168_interleave_0"), val = bool(false)]; int32 gather_163_cast_uint16_to_int32 = cast(dtype = gather_163_cast_uint16_to_int32_dtype_0, x = gather_163_cast_uint16)[name = string("cast_669")]; tensor concat_168 = concat(axis = concat_168_axis_0, interleave = concat_168_interleave_0, values = (gather_161, gather_162, var_78, gather_163_cast_uint16_to_int32, gather_164))[name = string("concat_168")]; tensor shape_182_cast_fp16 = shape(x = var_2002_cast_fp16)[name = string("shape_182_cast_fp16")]; tensor real_div_17 = real_div(x = concat_168, y = shape_182_cast_fp16)[name = string("real_div_17")]; tensor hidden_states_399_cast_fp16 = tile(reps = real_div_17, x = var_2002_cast_fp16)[name = string("hidden_states_399_cast_fp16")]; tensor concat_169x = const()[name = string("concat_169x"), val = tensor([1, 16, -1, 128])]; tensor value_33_cast_fp16 = reshape(shape = concat_169x, x = hidden_states_399_cast_fp16)[name = string("value_33_cast_fp16")]; tensor var_2012_shape_cast_fp16 = shape(x = key_33_cast_fp16)[name = string("op_2012_shape_cast_fp16")]; int32 gather_165_axis_0 = const()[name = string("gather_165_axis_0"), val = int32(0)]; int32 gather_165_batch_dims_0 = const()[name = string("gather_165_batch_dims_0"), val = int32(0)]; bool gather_165_validate_indices_0 = const()[name = string("gather_165_validate_indices_0"), val = bool(false)]; string var_2012_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2012_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_165_to_uint16 = const()[name = string("select_165_to_uint16"), val = uint16(2)]; tensor var_2012_shape_cast_fp16_to_uint16 = cast(dtype = var_2012_shape_cast_fp16_to_uint16_dtype_0, x = var_2012_shape_cast_fp16)[name = string("cast_668")]; uint16 gather_165_cast_uint16 = gather(axis = gather_165_axis_0, batch_dims = gather_165_batch_dims_0, indices = select_165_to_uint16, validate_indices = gather_165_validate_indices_0, x = var_2012_shape_cast_fp16_to_uint16)[name = string("gather_165_cast_uint16")]; string gather_165_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_165_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_170_values0_0 = const()[name = string("concat_170_values0_0"), val = int32(1)]; int32 concat_170_values1_0 = const()[name = string("concat_170_values1_0"), val = int32(1)]; int32 concat_170_values2_0 = const()[name = string("concat_170_values2_0"), val = int32(0)]; int32 concat_170_axis_0 = const()[name = string("concat_170_axis_0"), val = int32(0)]; bool concat_170_interleave_0 = const()[name = string("concat_170_interleave_0"), val = bool(false)]; int32 gather_165_cast_uint16_to_int32 = cast(dtype = gather_165_cast_uint16_to_int32_dtype_0, x = gather_165_cast_uint16)[name = string("cast_667")]; tensor concat_170 = concat(axis = concat_170_axis_0, interleave = concat_170_interleave_0, values = (concat_170_values0_0, concat_170_values1_0, concat_170_values2_0, gather_165_cast_uint16_to_int32))[name = string("concat_170")]; tensor attention_mask_17_begin_0 = const()[name = string("attention_mask_17_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_17_end_mask_0 = const()[name = string("attention_mask_17_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_17_cast_fp16 = slice_by_index(begin = attention_mask_17_begin_0, end = concat_170, end_mask = attention_mask_17_end_mask_0, x = causal_mask)[name = string("attention_mask_17_cast_fp16")]; tensor mul_8_cast_fp16 = mul(x = query_33_cast_fp16, y = var_85_to_fp16)[name = string("mul_8_cast_fp16")]; bool matmul_8_transpose_y_0 = const()[name = string("matmul_8_transpose_y_0"), val = bool(true)]; bool matmul_8_transpose_x_0 = const()[name = string("matmul_8_transpose_x_0"), val = bool(false)]; tensor matmul_8_cast_fp16 = matmul(transpose_x = matmul_8_transpose_x_0, transpose_y = matmul_8_transpose_y_0, x = mul_8_cast_fp16, y = key_33_cast_fp16)[name = string("matmul_8_cast_fp16")]; tensor add_174_cast_fp16 = add(x = matmul_8_cast_fp16, y = attention_mask_17_cast_fp16)[name = string("add_174_cast_fp16")]; int32 softmax_8_axis_0 = const()[name = string("softmax_8_axis_0"), val = int32(-1)]; tensor softmax_8_cast_fp16 = softmax(axis = softmax_8_axis_0, x = add_174_cast_fp16)[name = string("softmax_8_cast_fp16")]; bool attn_output_33_transpose_x_0 = const()[name = string("attn_output_33_transpose_x_0"), val = bool(false)]; bool attn_output_33_transpose_y_0 = const()[name = string("attn_output_33_transpose_y_0"), val = bool(false)]; tensor attn_output_33_cast_fp16 = matmul(transpose_x = attn_output_33_transpose_x_0, transpose_y = attn_output_33_transpose_y_0, x = softmax_8_cast_fp16, y = value_33_cast_fp16)[name = string("attn_output_33_cast_fp16")]; tensor var_2021_perm_0 = const()[name = string("op_2021_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_171_axis_0 = const()[name = string("concat_171_axis_0"), val = int32(0)]; bool concat_171_interleave_0 = const()[name = string("concat_171_interleave_0"), val = bool(false)]; int32 gather_149_cast_uint16_to_int32 = cast(dtype = gather_149_cast_uint16_to_int32_dtype_0, x = gather_149_cast_uint16)[name = string("cast_673")]; tensor concat_171 = concat(axis = concat_171_axis_0, interleave = concat_171_interleave_0, values = (gather_148, gather_149_cast_uint16_to_int32, var_72))[name = string("concat_171")]; tensor var_2021_cast_fp16 = transpose(perm = var_2021_perm_0, x = attn_output_33_cast_fp16)[name = string("transpose_76")]; tensor var_2024_cast_fp16 = reshape(shape = concat_171, x = var_2021_cast_fp16)[name = string("op_2024_cast_fp16")]; tensor model_model_layers_8_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_8_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(571270016)))]; tensor linear_59_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_8_self_attn_o_proj_weight_to_fp16, x = var_2024_cast_fp16)[name = string("linear_59_cast_fp16")]; tensor hidden_states_403_cast_fp16 = add(x = hidden_states_367_cast_fp16, y = linear_59_cast_fp16)[name = string("hidden_states_403_cast_fp16")]; fp16 var_78_promoted_35_to_fp16 = const()[name = string("op_78_promoted_35_to_fp16"), val = fp16(0x1p+1)]; tensor var_2031_cast_fp16 = pow(x = hidden_states_403_cast_fp16, y = var_78_promoted_35_to_fp16)[name = string("op_2031_cast_fp16")]; tensor variance_71_axes_0 = const()[name = string("variance_71_axes_0"), val = tensor([-1])]; bool variance_71_keep_dims_0 = const()[name = string("variance_71_keep_dims_0"), val = bool(true)]; tensor variance_71_cast_fp16 = reduce_mean(axes = variance_71_axes_0, keep_dims = variance_71_keep_dims_0, x = var_2031_cast_fp16)[name = string("variance_71_cast_fp16")]; fp16 var_2034_to_fp16 = const()[name = string("op_2034_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2035_cast_fp16 = add(x = variance_71_cast_fp16, y = var_2034_to_fp16)[name = string("op_2035_cast_fp16")]; fp32 var_2036_epsilon_0 = const()[name = string("op_2036_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2036_cast_fp16 = rsqrt(epsilon = var_2036_epsilon_0, x = var_2035_cast_fp16)[name = string("op_2036_cast_fp16")]; tensor hidden_states_407_cast_fp16 = mul(x = hidden_states_403_cast_fp16, y = var_2036_cast_fp16)[name = string("hidden_states_407_cast_fp16")]; tensor model_model_layers_8_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_8_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(575464384)))]; tensor input_67_cast_fp16 = mul(x = model_model_layers_8_post_attention_layernorm_weight_to_fp16, y = hidden_states_407_cast_fp16)[name = string("input_67_cast_fp16")]; tensor model_model_layers_8_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_8_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(575466496)))]; tensor linear_60_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_8_mlp_gate_proj_weight_to_fp16, x = input_67_cast_fp16)[name = string("linear_60_cast_fp16")]; tensor var_2048_cast_fp16 = silu(x = linear_60_cast_fp16)[name = string("op_2048_cast_fp16")]; tensor model_model_layers_8_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_8_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(581758016)))]; tensor linear_61_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_8_mlp_up_proj_weight_to_fp16, x = input_67_cast_fp16)[name = string("linear_61_cast_fp16")]; tensor input_71_cast_fp16 = mul(x = var_2048_cast_fp16, y = linear_61_cast_fp16)[name = string("input_71_cast_fp16")]; tensor model_model_layers_8_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_8_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(588049536)))]; tensor linear_62_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_8_mlp_down_proj_weight_to_fp16, x = input_71_cast_fp16)[name = string("linear_62_cast_fp16")]; tensor hidden_states_413_cast_fp16 = add(x = hidden_states_403_cast_fp16, y = linear_62_cast_fp16)[name = string("hidden_states_413_cast_fp16")]; fp16 var_78_promoted_36_to_fp16 = const()[name = string("op_78_promoted_36_to_fp16"), val = fp16(0x1p+1)]; tensor var_2061_cast_fp16 = pow(x = hidden_states_413_cast_fp16, y = var_78_promoted_36_to_fp16)[name = string("op_2061_cast_fp16")]; tensor variance_73_axes_0 = const()[name = string("variance_73_axes_0"), val = tensor([-1])]; bool variance_73_keep_dims_0 = const()[name = string("variance_73_keep_dims_0"), val = bool(true)]; tensor variance_73_cast_fp16 = reduce_mean(axes = variance_73_axes_0, keep_dims = variance_73_keep_dims_0, x = var_2061_cast_fp16)[name = string("variance_73_cast_fp16")]; fp16 var_2064_to_fp16 = const()[name = string("op_2064_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2065_cast_fp16 = add(x = variance_73_cast_fp16, y = var_2064_to_fp16)[name = string("op_2065_cast_fp16")]; fp32 var_2066_epsilon_0 = const()[name = string("op_2066_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2066_cast_fp16 = rsqrt(epsilon = var_2066_epsilon_0, x = var_2065_cast_fp16)[name = string("op_2066_cast_fp16")]; tensor hidden_states_417_cast_fp16 = mul(x = hidden_states_413_cast_fp16, y = var_2066_cast_fp16)[name = string("hidden_states_417_cast_fp16")]; tensor model_model_layers_9_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594341056)))]; tensor hidden_states_421_cast_fp16 = mul(x = model_model_layers_9_input_layernorm_weight_to_fp16, y = hidden_states_417_cast_fp16)[name = string("hidden_states_421_cast_fp16")]; tensor var_2079_shape_cast_fp16 = shape(x = hidden_states_421_cast_fp16)[name = string("op_2079_shape_cast_fp16")]; int32 gather_166 = const()[name = string("gather_166"), val = int32(1)]; int32 gather_167_axis_0 = const()[name = string("gather_167_axis_0"), val = int32(0)]; int32 gather_167_batch_dims_0 = const()[name = string("gather_167_batch_dims_0"), val = int32(0)]; bool gather_167_validate_indices_0 = const()[name = string("gather_167_validate_indices_0"), val = bool(false)]; string var_2079_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2079_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_167_to_uint16 = const()[name = string("select_167_to_uint16"), val = uint16(1)]; tensor var_2079_shape_cast_fp16_to_uint16 = cast(dtype = var_2079_shape_cast_fp16_to_uint16_dtype_0, x = var_2079_shape_cast_fp16)[name = string("cast_666")]; uint16 gather_167_cast_uint16 = gather(axis = gather_167_axis_0, batch_dims = gather_167_batch_dims_0, indices = select_167_to_uint16, validate_indices = gather_167_validate_indices_0, x = var_2079_shape_cast_fp16_to_uint16)[name = string("gather_167_cast_uint16")]; string gather_167_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_167_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_9_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(594343168)))]; tensor linear_63_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_9_self_attn_q_proj_weight_to_fp16, x = hidden_states_421_cast_fp16)[name = string("linear_63_cast_fp16")]; tensor concat_172x = const()[name = string("concat_172x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_423_cast_fp16 = reshape(shape = concat_172x, x = linear_63_cast_fp16)[name = string("hidden_states_423_cast_fp16")]; fp16 var_78_promoted_37_to_fp16 = const()[name = string("op_78_promoted_37_to_fp16"), val = fp16(0x1p+1)]; tensor var_2087_cast_fp16 = pow(x = hidden_states_423_cast_fp16, y = var_78_promoted_37_to_fp16)[name = string("op_2087_cast_fp16")]; tensor variance_75_axes_0 = const()[name = string("variance_75_axes_0"), val = tensor([-1])]; bool variance_75_keep_dims_0 = const()[name = string("variance_75_keep_dims_0"), val = bool(true)]; tensor variance_75_cast_fp16 = reduce_mean(axes = variance_75_axes_0, keep_dims = variance_75_keep_dims_0, x = var_2087_cast_fp16)[name = string("variance_75_cast_fp16")]; fp16 var_2090_to_fp16 = const()[name = string("op_2090_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2091_cast_fp16 = add(x = variance_75_cast_fp16, y = var_2090_to_fp16)[name = string("op_2091_cast_fp16")]; fp32 var_2092_epsilon_0 = const()[name = string("op_2092_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2092_cast_fp16 = rsqrt(epsilon = var_2092_epsilon_0, x = var_2091_cast_fp16)[name = string("op_2092_cast_fp16")]; tensor hidden_states_427_cast_fp16 = mul(x = hidden_states_423_cast_fp16, y = var_2092_cast_fp16)[name = string("hidden_states_427_cast_fp16")]; tensor model_model_layers_9_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598537536)))]; tensor var_2095_cast_fp16 = mul(x = model_model_layers_9_self_attn_q_norm_weight_to_fp16, y = hidden_states_427_cast_fp16)[name = string("op_2095_cast_fp16")]; tensor q_19_perm_0 = const()[name = string("q_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_9_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(598537856)))]; tensor linear_64_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_9_self_attn_k_proj_weight_to_fp16, x = hidden_states_421_cast_fp16)[name = string("linear_64_cast_fp16")]; tensor concat_173x = const()[name = string("concat_173x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_431_cast_fp16 = reshape(shape = concat_173x, x = linear_64_cast_fp16)[name = string("hidden_states_431_cast_fp16")]; fp16 var_78_promoted_38_to_fp16 = const()[name = string("op_78_promoted_38_to_fp16"), val = fp16(0x1p+1)]; tensor var_2103_cast_fp16 = pow(x = hidden_states_431_cast_fp16, y = var_78_promoted_38_to_fp16)[name = string("op_2103_cast_fp16")]; tensor variance_77_axes_0 = const()[name = string("variance_77_axes_0"), val = tensor([-1])]; bool variance_77_keep_dims_0 = const()[name = string("variance_77_keep_dims_0"), val = bool(true)]; tensor variance_77_cast_fp16 = reduce_mean(axes = variance_77_axes_0, keep_dims = variance_77_keep_dims_0, x = var_2103_cast_fp16)[name = string("variance_77_cast_fp16")]; fp16 var_2106_to_fp16 = const()[name = string("op_2106_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2107_cast_fp16 = add(x = variance_77_cast_fp16, y = var_2106_to_fp16)[name = string("op_2107_cast_fp16")]; fp32 var_2108_epsilon_0 = const()[name = string("op_2108_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2108_cast_fp16 = rsqrt(epsilon = var_2108_epsilon_0, x = var_2107_cast_fp16)[name = string("op_2108_cast_fp16")]; tensor hidden_states_435_cast_fp16 = mul(x = hidden_states_431_cast_fp16, y = var_2108_cast_fp16)[name = string("hidden_states_435_cast_fp16")]; tensor model_model_layers_9_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600635072)))]; tensor var_2111_cast_fp16 = mul(x = model_model_layers_9_self_attn_k_norm_weight_to_fp16, y = hidden_states_435_cast_fp16)[name = string("op_2111_cast_fp16")]; tensor k_19_perm_0 = const()[name = string("k_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_9_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(600635392)))]; tensor linear_65_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_9_self_attn_v_proj_weight_to_fp16, x = hidden_states_421_cast_fp16)[name = string("linear_65_cast_fp16")]; tensor concat_174x = const()[name = string("concat_174x"), val = tensor([1, -1, 8, 128])]; tensor var_2116_cast_fp16 = reshape(shape = concat_174x, x = linear_65_cast_fp16)[name = string("op_2116_cast_fp16")]; tensor v_state_19_perm_0 = const()[name = string("v_state_19_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_19_cast_fp16 = transpose(perm = q_19_perm_0, x = var_2095_cast_fp16)[name = string("transpose_75")]; tensor var_2120_cast_fp16 = mul(x = q_19_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2120_cast_fp16")]; tensor x1_37_begin_0 = const()[name = string("x1_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_37_end_0 = const()[name = string("x1_37_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_37_end_mask_0 = const()[name = string("x1_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_37_cast_fp16 = slice_by_index(begin = x1_37_begin_0, end = x1_37_end_0, end_mask = x1_37_end_mask_0, x = q_19_cast_fp16)[name = string("x1_37_cast_fp16")]; tensor x2_37_begin_0 = const()[name = string("x2_37_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_37_end_0 = const()[name = string("x2_37_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_37_end_mask_0 = const()[name = string("x2_37_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_37_cast_fp16 = slice_by_index(begin = x2_37_begin_0, end = x2_37_end_0, end_mask = x2_37_end_mask_0, x = q_19_cast_fp16)[name = string("x2_37_cast_fp16")]; fp16 const_22_promoted_to_fp16 = const()[name = string("const_22_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2131_cast_fp16 = mul(x = x2_37_cast_fp16, y = const_22_promoted_to_fp16)[name = string("op_2131_cast_fp16")]; bool var_2133_interleave_0 = const()[name = string("op_2133_interleave_0"), val = bool(false)]; tensor var_2133_cast_fp16 = concat(axis = var_72, interleave = var_2133_interleave_0, values = (var_2131_cast_fp16, x1_37_cast_fp16))[name = string("op_2133_cast_fp16")]; tensor var_2134_cast_fp16 = mul(x = var_2133_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2134_cast_fp16")]; tensor query_37_cast_fp16 = add(x = var_2120_cast_fp16, y = var_2134_cast_fp16)[name = string("query_37_cast_fp16")]; tensor k_19_cast_fp16 = transpose(perm = k_19_perm_0, x = var_2111_cast_fp16)[name = string("transpose_74")]; tensor var_2136_cast_fp16 = mul(x = k_19_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2136_cast_fp16")]; tensor x1_39_begin_0 = const()[name = string("x1_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_39_end_0 = const()[name = string("x1_39_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_39_end_mask_0 = const()[name = string("x1_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_39_cast_fp16 = slice_by_index(begin = x1_39_begin_0, end = x1_39_end_0, end_mask = x1_39_end_mask_0, x = k_19_cast_fp16)[name = string("x1_39_cast_fp16")]; tensor x2_39_begin_0 = const()[name = string("x2_39_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_39_end_0 = const()[name = string("x2_39_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_39_end_mask_0 = const()[name = string("x2_39_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_39_cast_fp16 = slice_by_index(begin = x2_39_begin_0, end = x2_39_end_0, end_mask = x2_39_end_mask_0, x = k_19_cast_fp16)[name = string("x2_39_cast_fp16")]; fp16 const_23_promoted_to_fp16 = const()[name = string("const_23_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2147_cast_fp16 = mul(x = x2_39_cast_fp16, y = const_23_promoted_to_fp16)[name = string("op_2147_cast_fp16")]; bool var_2149_interleave_0 = const()[name = string("op_2149_interleave_0"), val = bool(false)]; tensor var_2149_cast_fp16 = concat(axis = var_72, interleave = var_2149_interleave_0, values = (var_2147_cast_fp16, x1_39_cast_fp16))[name = string("op_2149_cast_fp16")]; tensor var_2150_cast_fp16 = mul(x = var_2149_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2150_cast_fp16")]; tensor k_state_19_cast_fp16 = add(x = var_2136_cast_fp16, y = var_2150_cast_fp16)[name = string("k_state_19_cast_fp16")]; tensor expand_dims_108 = const()[name = string("expand_dims_108"), val = tensor([0])]; tensor expand_dims_109 = const()[name = string("expand_dims_109"), val = tensor([0])]; tensor expand_dims_111 = const()[name = string("expand_dims_111"), val = tensor([0])]; tensor concat_177_values0_0 = const()[name = string("concat_177_values0_0"), val = tensor([9])]; int32 concat_177_axis_0 = const()[name = string("concat_177_axis_0"), val = int32(0)]; bool concat_177_interleave_0 = const()[name = string("concat_177_interleave_0"), val = bool(false)]; tensor concat_177 = concat(axis = concat_177_axis_0, interleave = concat_177_interleave_0, values = (concat_177_values0_0, expand_dims_108, expand_dims_109, expand_dims_2, expand_dims_111))[name = string("concat_177")]; tensor key_cache_internal_tensor_assign_10_stride_0 = const()[name = string("key_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_177, begin_mask = key_cache_internal_tensor_assign_10_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_10_squeeze_mask_0, stride = key_cache_internal_tensor_assign_10_stride_0, update = k_state_19_cast_fp16, x = coreml_update_state_72)[name = string("key_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_10_cast_fp16, input = key_cache)[name = string("coreml_update_state_74_write_state")]; tensor coreml_update_state_74 = read_state(input = key_cache)[name = string("coreml_update_state_74")]; tensor value_cache_internal_tensor_assign_10_stride_0 = const()[name = string("value_cache_internal_tensor_assign_10_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_10_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_10_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_10_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_10_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_19_cast_fp16 = transpose(perm = v_state_19_perm_0, x = var_2116_cast_fp16)[name = string("transpose_73")]; tensor value_cache_internal_tensor_assign_10_cast_fp16 = slice_update(begin = concat_177, begin_mask = value_cache_internal_tensor_assign_10_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_10_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_10_squeeze_mask_0, stride = value_cache_internal_tensor_assign_10_stride_0, update = v_state_19_cast_fp16, x = coreml_update_state_73)[name = string("value_cache_internal_tensor_assign_10_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_10_cast_fp16, input = value_cache)[name = string("coreml_update_state_75_write_state")]; tensor coreml_update_state_75 = read_state(input = value_cache)[name = string("coreml_update_state_75")]; tensor var_2173_begin_0 = const()[name = string("op_2173_begin_0"), val = tensor([9, 0, 0, 0, 0])]; tensor var_2173_end_0 = const()[name = string("op_2173_end_0"), val = tensor([10, 1, 8, 2048, 128])]; tensor var_2173_end_mask_0 = const()[name = string("op_2173_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2173_squeeze_mask_0 = const()[name = string("op_2173_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2173_cast_fp16 = slice_by_index(begin = var_2173_begin_0, end = var_2173_end_0, end_mask = var_2173_end_mask_0, squeeze_mask = var_2173_squeeze_mask_0, x = coreml_update_state_74)[name = string("op_2173_cast_fp16")]; tensor var_2176_begin_0 = const()[name = string("op_2176_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2176_end_mask_0 = const()[name = string("op_2176_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2176_cast_fp16 = slice_by_index(begin = var_2176_begin_0, end = concat_12, end_mask = var_2176_end_mask_0, x = var_2173_cast_fp16)[name = string("op_2176_cast_fp16")]; tensor var_2178_begin_0 = const()[name = string("op_2178_begin_0"), val = tensor([9, 0, 0, 0, 0])]; tensor var_2178_end_0 = const()[name = string("op_2178_end_0"), val = tensor([10, 1, 8, 2048, 128])]; tensor var_2178_end_mask_0 = const()[name = string("op_2178_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2178_squeeze_mask_0 = const()[name = string("op_2178_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2178_cast_fp16 = slice_by_index(begin = var_2178_begin_0, end = var_2178_end_0, end_mask = var_2178_end_mask_0, squeeze_mask = var_2178_squeeze_mask_0, x = coreml_update_state_75)[name = string("op_2178_cast_fp16")]; tensor var_2181_begin_0 = const()[name = string("op_2181_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2181_end_mask_0 = const()[name = string("op_2181_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2181_cast_fp16 = slice_by_index(begin = var_2181_begin_0, end = concat_12, end_mask = var_2181_end_mask_0, x = var_2178_cast_fp16)[name = string("op_2181_cast_fp16")]; tensor var_2183_shape_cast_fp16 = shape(x = var_2176_cast_fp16)[name = string("op_2183_shape_cast_fp16")]; int32 gather_175 = const()[name = string("gather_175"), val = int32(1)]; int32 gather_176 = const()[name = string("gather_176"), val = int32(8)]; int32 gather_177_axis_0 = const()[name = string("gather_177_axis_0"), val = int32(0)]; int32 gather_177_batch_dims_0 = const()[name = string("gather_177_batch_dims_0"), val = int32(0)]; bool gather_177_validate_indices_0 = const()[name = string("gather_177_validate_indices_0"), val = bool(false)]; string var_2183_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2183_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_177_to_uint16 = const()[name = string("select_177_to_uint16"), val = uint16(2)]; tensor var_2183_shape_cast_fp16_to_uint16 = cast(dtype = var_2183_shape_cast_fp16_to_uint16_dtype_0, x = var_2183_shape_cast_fp16)[name = string("cast_664")]; uint16 gather_177_cast_uint16 = gather(axis = gather_177_axis_0, batch_dims = gather_177_batch_dims_0, indices = select_177_to_uint16, validate_indices = gather_177_validate_indices_0, x = var_2183_shape_cast_fp16_to_uint16)[name = string("gather_177_cast_uint16")]; string gather_177_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_177_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_178 = const()[name = string("gather_178"), val = int32(128)]; tensor var_2190_axes_0 = const()[name = string("op_2190_axes_0"), val = tensor([2])]; tensor var_2190_cast_fp16 = expand_dims(axes = var_2190_axes_0, x = var_2176_cast_fp16)[name = string("op_2190_cast_fp16")]; int32 concat_185_axis_0 = const()[name = string("concat_185_axis_0"), val = int32(0)]; bool concat_185_interleave_0 = const()[name = string("concat_185_interleave_0"), val = bool(false)]; int32 gather_177_cast_uint16_to_int32 = cast(dtype = gather_177_cast_uint16_to_int32_dtype_0, x = gather_177_cast_uint16)[name = string("cast_663")]; tensor concat_185 = concat(axis = concat_185_axis_0, interleave = concat_185_interleave_0, values = (gather_175, gather_176, var_78, gather_177_cast_uint16_to_int32, gather_178))[name = string("concat_185")]; tensor shape_197_cast_fp16 = shape(x = var_2190_cast_fp16)[name = string("shape_197_cast_fp16")]; tensor real_div_18 = real_div(x = concat_185, y = shape_197_cast_fp16)[name = string("real_div_18")]; tensor hidden_states_441_cast_fp16 = tile(reps = real_div_18, x = var_2190_cast_fp16)[name = string("hidden_states_441_cast_fp16")]; tensor concat_186x = const()[name = string("concat_186x"), val = tensor([1, 16, -1, 128])]; tensor key_37_cast_fp16 = reshape(shape = concat_186x, x = hidden_states_441_cast_fp16)[name = string("key_37_cast_fp16")]; tensor var_2200_shape_cast_fp16 = shape(x = var_2181_cast_fp16)[name = string("op_2200_shape_cast_fp16")]; int32 gather_179 = const()[name = string("gather_179"), val = int32(1)]; int32 gather_180 = const()[name = string("gather_180"), val = int32(8)]; int32 gather_181_axis_0 = const()[name = string("gather_181_axis_0"), val = int32(0)]; int32 gather_181_batch_dims_0 = const()[name = string("gather_181_batch_dims_0"), val = int32(0)]; bool gather_181_validate_indices_0 = const()[name = string("gather_181_validate_indices_0"), val = bool(false)]; string var_2200_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2200_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_181_to_uint16 = const()[name = string("select_181_to_uint16"), val = uint16(2)]; tensor var_2200_shape_cast_fp16_to_uint16 = cast(dtype = var_2200_shape_cast_fp16_to_uint16_dtype_0, x = var_2200_shape_cast_fp16)[name = string("cast_662")]; uint16 gather_181_cast_uint16 = gather(axis = gather_181_axis_0, batch_dims = gather_181_batch_dims_0, indices = select_181_to_uint16, validate_indices = gather_181_validate_indices_0, x = var_2200_shape_cast_fp16_to_uint16)[name = string("gather_181_cast_uint16")]; string gather_181_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_181_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_182 = const()[name = string("gather_182"), val = int32(128)]; tensor var_2207_axes_0 = const()[name = string("op_2207_axes_0"), val = tensor([2])]; tensor var_2207_cast_fp16 = expand_dims(axes = var_2207_axes_0, x = var_2181_cast_fp16)[name = string("op_2207_cast_fp16")]; int32 concat_187_axis_0 = const()[name = string("concat_187_axis_0"), val = int32(0)]; bool concat_187_interleave_0 = const()[name = string("concat_187_interleave_0"), val = bool(false)]; int32 gather_181_cast_uint16_to_int32 = cast(dtype = gather_181_cast_uint16_to_int32_dtype_0, x = gather_181_cast_uint16)[name = string("cast_661")]; tensor concat_187 = concat(axis = concat_187_axis_0, interleave = concat_187_interleave_0, values = (gather_179, gather_180, var_78, gather_181_cast_uint16_to_int32, gather_182))[name = string("concat_187")]; tensor shape_202_cast_fp16 = shape(x = var_2207_cast_fp16)[name = string("shape_202_cast_fp16")]; tensor real_div_19 = real_div(x = concat_187, y = shape_202_cast_fp16)[name = string("real_div_19")]; tensor hidden_states_445_cast_fp16 = tile(reps = real_div_19, x = var_2207_cast_fp16)[name = string("hidden_states_445_cast_fp16")]; tensor concat_188x = const()[name = string("concat_188x"), val = tensor([1, 16, -1, 128])]; tensor value_37_cast_fp16 = reshape(shape = concat_188x, x = hidden_states_445_cast_fp16)[name = string("value_37_cast_fp16")]; tensor var_2217_shape_cast_fp16 = shape(x = key_37_cast_fp16)[name = string("op_2217_shape_cast_fp16")]; int32 gather_183_axis_0 = const()[name = string("gather_183_axis_0"), val = int32(0)]; int32 gather_183_batch_dims_0 = const()[name = string("gather_183_batch_dims_0"), val = int32(0)]; bool gather_183_validate_indices_0 = const()[name = string("gather_183_validate_indices_0"), val = bool(false)]; string var_2217_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2217_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_183_to_uint16 = const()[name = string("select_183_to_uint16"), val = uint16(2)]; tensor var_2217_shape_cast_fp16_to_uint16 = cast(dtype = var_2217_shape_cast_fp16_to_uint16_dtype_0, x = var_2217_shape_cast_fp16)[name = string("cast_660")]; uint16 gather_183_cast_uint16 = gather(axis = gather_183_axis_0, batch_dims = gather_183_batch_dims_0, indices = select_183_to_uint16, validate_indices = gather_183_validate_indices_0, x = var_2217_shape_cast_fp16_to_uint16)[name = string("gather_183_cast_uint16")]; string gather_183_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_183_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_189_values0_0 = const()[name = string("concat_189_values0_0"), val = int32(1)]; int32 concat_189_values1_0 = const()[name = string("concat_189_values1_0"), val = int32(1)]; int32 concat_189_values2_0 = const()[name = string("concat_189_values2_0"), val = int32(0)]; int32 concat_189_axis_0 = const()[name = string("concat_189_axis_0"), val = int32(0)]; bool concat_189_interleave_0 = const()[name = string("concat_189_interleave_0"), val = bool(false)]; int32 gather_183_cast_uint16_to_int32 = cast(dtype = gather_183_cast_uint16_to_int32_dtype_0, x = gather_183_cast_uint16)[name = string("cast_659")]; tensor concat_189 = concat(axis = concat_189_axis_0, interleave = concat_189_interleave_0, values = (concat_189_values0_0, concat_189_values1_0, concat_189_values2_0, gather_183_cast_uint16_to_int32))[name = string("concat_189")]; tensor attention_mask_19_begin_0 = const()[name = string("attention_mask_19_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_19_end_mask_0 = const()[name = string("attention_mask_19_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_19_cast_fp16 = slice_by_index(begin = attention_mask_19_begin_0, end = concat_189, end_mask = attention_mask_19_end_mask_0, x = causal_mask)[name = string("attention_mask_19_cast_fp16")]; tensor mul_9_cast_fp16 = mul(x = query_37_cast_fp16, y = var_85_to_fp16)[name = string("mul_9_cast_fp16")]; bool matmul_9_transpose_y_0 = const()[name = string("matmul_9_transpose_y_0"), val = bool(true)]; bool matmul_9_transpose_x_0 = const()[name = string("matmul_9_transpose_x_0"), val = bool(false)]; tensor matmul_9_cast_fp16 = matmul(transpose_x = matmul_9_transpose_x_0, transpose_y = matmul_9_transpose_y_0, x = mul_9_cast_fp16, y = key_37_cast_fp16)[name = string("matmul_9_cast_fp16")]; tensor add_193_cast_fp16 = add(x = matmul_9_cast_fp16, y = attention_mask_19_cast_fp16)[name = string("add_193_cast_fp16")]; int32 softmax_9_axis_0 = const()[name = string("softmax_9_axis_0"), val = int32(-1)]; tensor softmax_9_cast_fp16 = softmax(axis = softmax_9_axis_0, x = add_193_cast_fp16)[name = string("softmax_9_cast_fp16")]; bool attn_output_37_transpose_x_0 = const()[name = string("attn_output_37_transpose_x_0"), val = bool(false)]; bool attn_output_37_transpose_y_0 = const()[name = string("attn_output_37_transpose_y_0"), val = bool(false)]; tensor attn_output_37_cast_fp16 = matmul(transpose_x = attn_output_37_transpose_x_0, transpose_y = attn_output_37_transpose_y_0, x = softmax_9_cast_fp16, y = value_37_cast_fp16)[name = string("attn_output_37_cast_fp16")]; tensor var_2226_perm_0 = const()[name = string("op_2226_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_190_axis_0 = const()[name = string("concat_190_axis_0"), val = int32(0)]; bool concat_190_interleave_0 = const()[name = string("concat_190_interleave_0"), val = bool(false)]; int32 gather_167_cast_uint16_to_int32 = cast(dtype = gather_167_cast_uint16_to_int32_dtype_0, x = gather_167_cast_uint16)[name = string("cast_665")]; tensor concat_190 = concat(axis = concat_190_axis_0, interleave = concat_190_interleave_0, values = (gather_166, gather_167_cast_uint16_to_int32, var_72))[name = string("concat_190")]; tensor var_2226_cast_fp16 = transpose(perm = var_2226_perm_0, x = attn_output_37_cast_fp16)[name = string("transpose_72")]; tensor var_2229_cast_fp16 = reshape(shape = concat_190, x = var_2226_cast_fp16)[name = string("op_2229_cast_fp16")]; tensor model_model_layers_9_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_9_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(602732608)))]; tensor linear_66_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_9_self_attn_o_proj_weight_to_fp16, x = var_2229_cast_fp16)[name = string("linear_66_cast_fp16")]; tensor hidden_states_449_cast_fp16 = add(x = hidden_states_413_cast_fp16, y = linear_66_cast_fp16)[name = string("hidden_states_449_cast_fp16")]; fp16 var_78_promoted_39_to_fp16 = const()[name = string("op_78_promoted_39_to_fp16"), val = fp16(0x1p+1)]; tensor var_2236_cast_fp16 = pow(x = hidden_states_449_cast_fp16, y = var_78_promoted_39_to_fp16)[name = string("op_2236_cast_fp16")]; tensor variance_79_axes_0 = const()[name = string("variance_79_axes_0"), val = tensor([-1])]; bool variance_79_keep_dims_0 = const()[name = string("variance_79_keep_dims_0"), val = bool(true)]; tensor variance_79_cast_fp16 = reduce_mean(axes = variance_79_axes_0, keep_dims = variance_79_keep_dims_0, x = var_2236_cast_fp16)[name = string("variance_79_cast_fp16")]; fp16 var_2239_to_fp16 = const()[name = string("op_2239_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2240_cast_fp16 = add(x = variance_79_cast_fp16, y = var_2239_to_fp16)[name = string("op_2240_cast_fp16")]; fp32 var_2241_epsilon_0 = const()[name = string("op_2241_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2241_cast_fp16 = rsqrt(epsilon = var_2241_epsilon_0, x = var_2240_cast_fp16)[name = string("op_2241_cast_fp16")]; tensor hidden_states_453_cast_fp16 = mul(x = hidden_states_449_cast_fp16, y = var_2241_cast_fp16)[name = string("hidden_states_453_cast_fp16")]; tensor model_model_layers_9_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_9_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606926976)))]; tensor input_75_cast_fp16 = mul(x = model_model_layers_9_post_attention_layernorm_weight_to_fp16, y = hidden_states_453_cast_fp16)[name = string("input_75_cast_fp16")]; tensor model_model_layers_9_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_9_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(606929088)))]; tensor linear_67_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_9_mlp_gate_proj_weight_to_fp16, x = input_75_cast_fp16)[name = string("linear_67_cast_fp16")]; tensor var_2253_cast_fp16 = silu(x = linear_67_cast_fp16)[name = string("op_2253_cast_fp16")]; tensor model_model_layers_9_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_9_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(613220608)))]; tensor linear_68_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_9_mlp_up_proj_weight_to_fp16, x = input_75_cast_fp16)[name = string("linear_68_cast_fp16")]; tensor input_79_cast_fp16 = mul(x = var_2253_cast_fp16, y = linear_68_cast_fp16)[name = string("input_79_cast_fp16")]; tensor model_model_layers_9_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_9_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(619512128)))]; tensor linear_69_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_9_mlp_down_proj_weight_to_fp16, x = input_79_cast_fp16)[name = string("linear_69_cast_fp16")]; tensor hidden_states_459_cast_fp16 = add(x = hidden_states_449_cast_fp16, y = linear_69_cast_fp16)[name = string("hidden_states_459_cast_fp16")]; fp16 var_78_promoted_40_to_fp16 = const()[name = string("op_78_promoted_40_to_fp16"), val = fp16(0x1p+1)]; tensor var_2266_cast_fp16 = pow(x = hidden_states_459_cast_fp16, y = var_78_promoted_40_to_fp16)[name = string("op_2266_cast_fp16")]; tensor variance_81_axes_0 = const()[name = string("variance_81_axes_0"), val = tensor([-1])]; bool variance_81_keep_dims_0 = const()[name = string("variance_81_keep_dims_0"), val = bool(true)]; tensor variance_81_cast_fp16 = reduce_mean(axes = variance_81_axes_0, keep_dims = variance_81_keep_dims_0, x = var_2266_cast_fp16)[name = string("variance_81_cast_fp16")]; fp16 var_2269_to_fp16 = const()[name = string("op_2269_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2270_cast_fp16 = add(x = variance_81_cast_fp16, y = var_2269_to_fp16)[name = string("op_2270_cast_fp16")]; fp32 var_2271_epsilon_0 = const()[name = string("op_2271_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2271_cast_fp16 = rsqrt(epsilon = var_2271_epsilon_0, x = var_2270_cast_fp16)[name = string("op_2271_cast_fp16")]; tensor hidden_states_463_cast_fp16 = mul(x = hidden_states_459_cast_fp16, y = var_2271_cast_fp16)[name = string("hidden_states_463_cast_fp16")]; tensor model_model_layers_10_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(625803648)))]; tensor hidden_states_467_cast_fp16 = mul(x = model_model_layers_10_input_layernorm_weight_to_fp16, y = hidden_states_463_cast_fp16)[name = string("hidden_states_467_cast_fp16")]; tensor var_2284_shape_cast_fp16 = shape(x = hidden_states_467_cast_fp16)[name = string("op_2284_shape_cast_fp16")]; int32 gather_184 = const()[name = string("gather_184"), val = int32(1)]; int32 gather_185_axis_0 = const()[name = string("gather_185_axis_0"), val = int32(0)]; int32 gather_185_batch_dims_0 = const()[name = string("gather_185_batch_dims_0"), val = int32(0)]; bool gather_185_validate_indices_0 = const()[name = string("gather_185_validate_indices_0"), val = bool(false)]; string var_2284_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2284_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_185_to_uint16 = const()[name = string("select_185_to_uint16"), val = uint16(1)]; tensor var_2284_shape_cast_fp16_to_uint16 = cast(dtype = var_2284_shape_cast_fp16_to_uint16_dtype_0, x = var_2284_shape_cast_fp16)[name = string("cast_658")]; uint16 gather_185_cast_uint16 = gather(axis = gather_185_axis_0, batch_dims = gather_185_batch_dims_0, indices = select_185_to_uint16, validate_indices = gather_185_validate_indices_0, x = var_2284_shape_cast_fp16_to_uint16)[name = string("gather_185_cast_uint16")]; string gather_185_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_185_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_10_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(625805760)))]; tensor linear_70_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_10_self_attn_q_proj_weight_to_fp16, x = hidden_states_467_cast_fp16)[name = string("linear_70_cast_fp16")]; tensor concat_191x = const()[name = string("concat_191x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_469_cast_fp16 = reshape(shape = concat_191x, x = linear_70_cast_fp16)[name = string("hidden_states_469_cast_fp16")]; fp16 var_78_promoted_41_to_fp16 = const()[name = string("op_78_promoted_41_to_fp16"), val = fp16(0x1p+1)]; tensor var_2292_cast_fp16 = pow(x = hidden_states_469_cast_fp16, y = var_78_promoted_41_to_fp16)[name = string("op_2292_cast_fp16")]; tensor variance_83_axes_0 = const()[name = string("variance_83_axes_0"), val = tensor([-1])]; bool variance_83_keep_dims_0 = const()[name = string("variance_83_keep_dims_0"), val = bool(true)]; tensor variance_83_cast_fp16 = reduce_mean(axes = variance_83_axes_0, keep_dims = variance_83_keep_dims_0, x = var_2292_cast_fp16)[name = string("variance_83_cast_fp16")]; fp16 var_2295_to_fp16 = const()[name = string("op_2295_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2296_cast_fp16 = add(x = variance_83_cast_fp16, y = var_2295_to_fp16)[name = string("op_2296_cast_fp16")]; fp32 var_2297_epsilon_0 = const()[name = string("op_2297_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2297_cast_fp16 = rsqrt(epsilon = var_2297_epsilon_0, x = var_2296_cast_fp16)[name = string("op_2297_cast_fp16")]; tensor hidden_states_473_cast_fp16 = mul(x = hidden_states_469_cast_fp16, y = var_2297_cast_fp16)[name = string("hidden_states_473_cast_fp16")]; tensor model_model_layers_10_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630000128)))]; tensor var_2300_cast_fp16 = mul(x = model_model_layers_10_self_attn_q_norm_weight_to_fp16, y = hidden_states_473_cast_fp16)[name = string("op_2300_cast_fp16")]; tensor q_21_perm_0 = const()[name = string("q_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_10_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(630000448)))]; tensor linear_71_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_10_self_attn_k_proj_weight_to_fp16, x = hidden_states_467_cast_fp16)[name = string("linear_71_cast_fp16")]; tensor concat_192x = const()[name = string("concat_192x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_477_cast_fp16 = reshape(shape = concat_192x, x = linear_71_cast_fp16)[name = string("hidden_states_477_cast_fp16")]; fp16 var_78_promoted_42_to_fp16 = const()[name = string("op_78_promoted_42_to_fp16"), val = fp16(0x1p+1)]; tensor var_2308_cast_fp16 = pow(x = hidden_states_477_cast_fp16, y = var_78_promoted_42_to_fp16)[name = string("op_2308_cast_fp16")]; tensor variance_85_axes_0 = const()[name = string("variance_85_axes_0"), val = tensor([-1])]; bool variance_85_keep_dims_0 = const()[name = string("variance_85_keep_dims_0"), val = bool(true)]; tensor variance_85_cast_fp16 = reduce_mean(axes = variance_85_axes_0, keep_dims = variance_85_keep_dims_0, x = var_2308_cast_fp16)[name = string("variance_85_cast_fp16")]; fp16 var_2311_to_fp16 = const()[name = string("op_2311_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2312_cast_fp16 = add(x = variance_85_cast_fp16, y = var_2311_to_fp16)[name = string("op_2312_cast_fp16")]; fp32 var_2313_epsilon_0 = const()[name = string("op_2313_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2313_cast_fp16 = rsqrt(epsilon = var_2313_epsilon_0, x = var_2312_cast_fp16)[name = string("op_2313_cast_fp16")]; tensor hidden_states_481_cast_fp16 = mul(x = hidden_states_477_cast_fp16, y = var_2313_cast_fp16)[name = string("hidden_states_481_cast_fp16")]; tensor model_model_layers_10_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632097664)))]; tensor var_2316_cast_fp16 = mul(x = model_model_layers_10_self_attn_k_norm_weight_to_fp16, y = hidden_states_481_cast_fp16)[name = string("op_2316_cast_fp16")]; tensor k_21_perm_0 = const()[name = string("k_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_10_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(632097984)))]; tensor linear_72_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_10_self_attn_v_proj_weight_to_fp16, x = hidden_states_467_cast_fp16)[name = string("linear_72_cast_fp16")]; tensor concat_193x = const()[name = string("concat_193x"), val = tensor([1, -1, 8, 128])]; tensor var_2321_cast_fp16 = reshape(shape = concat_193x, x = linear_72_cast_fp16)[name = string("op_2321_cast_fp16")]; tensor v_state_21_perm_0 = const()[name = string("v_state_21_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_21_cast_fp16 = transpose(perm = q_21_perm_0, x = var_2300_cast_fp16)[name = string("transpose_71")]; tensor var_2325_cast_fp16 = mul(x = q_21_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2325_cast_fp16")]; tensor x1_41_begin_0 = const()[name = string("x1_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_41_end_0 = const()[name = string("x1_41_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_41_end_mask_0 = const()[name = string("x1_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_41_cast_fp16 = slice_by_index(begin = x1_41_begin_0, end = x1_41_end_0, end_mask = x1_41_end_mask_0, x = q_21_cast_fp16)[name = string("x1_41_cast_fp16")]; tensor x2_41_begin_0 = const()[name = string("x2_41_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_41_end_0 = const()[name = string("x2_41_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_41_end_mask_0 = const()[name = string("x2_41_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_41_cast_fp16 = slice_by_index(begin = x2_41_begin_0, end = x2_41_end_0, end_mask = x2_41_end_mask_0, x = q_21_cast_fp16)[name = string("x2_41_cast_fp16")]; fp16 const_24_promoted_to_fp16 = const()[name = string("const_24_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2336_cast_fp16 = mul(x = x2_41_cast_fp16, y = const_24_promoted_to_fp16)[name = string("op_2336_cast_fp16")]; bool var_2338_interleave_0 = const()[name = string("op_2338_interleave_0"), val = bool(false)]; tensor var_2338_cast_fp16 = concat(axis = var_72, interleave = var_2338_interleave_0, values = (var_2336_cast_fp16, x1_41_cast_fp16))[name = string("op_2338_cast_fp16")]; tensor var_2339_cast_fp16 = mul(x = var_2338_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2339_cast_fp16")]; tensor query_41_cast_fp16 = add(x = var_2325_cast_fp16, y = var_2339_cast_fp16)[name = string("query_41_cast_fp16")]; tensor k_21_cast_fp16 = transpose(perm = k_21_perm_0, x = var_2316_cast_fp16)[name = string("transpose_70")]; tensor var_2341_cast_fp16 = mul(x = k_21_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2341_cast_fp16")]; tensor x1_43_begin_0 = const()[name = string("x1_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_43_end_0 = const()[name = string("x1_43_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_43_end_mask_0 = const()[name = string("x1_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_43_cast_fp16 = slice_by_index(begin = x1_43_begin_0, end = x1_43_end_0, end_mask = x1_43_end_mask_0, x = k_21_cast_fp16)[name = string("x1_43_cast_fp16")]; tensor x2_43_begin_0 = const()[name = string("x2_43_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_43_end_0 = const()[name = string("x2_43_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_43_end_mask_0 = const()[name = string("x2_43_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_43_cast_fp16 = slice_by_index(begin = x2_43_begin_0, end = x2_43_end_0, end_mask = x2_43_end_mask_0, x = k_21_cast_fp16)[name = string("x2_43_cast_fp16")]; fp16 const_25_promoted_to_fp16 = const()[name = string("const_25_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2352_cast_fp16 = mul(x = x2_43_cast_fp16, y = const_25_promoted_to_fp16)[name = string("op_2352_cast_fp16")]; bool var_2354_interleave_0 = const()[name = string("op_2354_interleave_0"), val = bool(false)]; tensor var_2354_cast_fp16 = concat(axis = var_72, interleave = var_2354_interleave_0, values = (var_2352_cast_fp16, x1_43_cast_fp16))[name = string("op_2354_cast_fp16")]; tensor var_2355_cast_fp16 = mul(x = var_2354_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2355_cast_fp16")]; tensor k_state_21_cast_fp16 = add(x = var_2341_cast_fp16, y = var_2355_cast_fp16)[name = string("k_state_21_cast_fp16")]; tensor expand_dims_120 = const()[name = string("expand_dims_120"), val = tensor([0])]; tensor expand_dims_121 = const()[name = string("expand_dims_121"), val = tensor([0])]; tensor expand_dims_123 = const()[name = string("expand_dims_123"), val = tensor([0])]; tensor concat_196_values0_0 = const()[name = string("concat_196_values0_0"), val = tensor([10])]; int32 concat_196_axis_0 = const()[name = string("concat_196_axis_0"), val = int32(0)]; bool concat_196_interleave_0 = const()[name = string("concat_196_interleave_0"), val = bool(false)]; tensor concat_196 = concat(axis = concat_196_axis_0, interleave = concat_196_interleave_0, values = (concat_196_values0_0, expand_dims_120, expand_dims_121, expand_dims_2, expand_dims_123))[name = string("concat_196")]; tensor key_cache_internal_tensor_assign_11_stride_0 = const()[name = string("key_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_196, begin_mask = key_cache_internal_tensor_assign_11_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_11_squeeze_mask_0, stride = key_cache_internal_tensor_assign_11_stride_0, update = k_state_21_cast_fp16, x = coreml_update_state_74)[name = string("key_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_11_cast_fp16, input = key_cache)[name = string("coreml_update_state_76_write_state")]; tensor coreml_update_state_76 = read_state(input = key_cache)[name = string("coreml_update_state_76")]; tensor value_cache_internal_tensor_assign_11_stride_0 = const()[name = string("value_cache_internal_tensor_assign_11_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_11_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_11_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_11_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_11_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_21_cast_fp16 = transpose(perm = v_state_21_perm_0, x = var_2321_cast_fp16)[name = string("transpose_69")]; tensor value_cache_internal_tensor_assign_11_cast_fp16 = slice_update(begin = concat_196, begin_mask = value_cache_internal_tensor_assign_11_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_11_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_11_squeeze_mask_0, stride = value_cache_internal_tensor_assign_11_stride_0, update = v_state_21_cast_fp16, x = coreml_update_state_75)[name = string("value_cache_internal_tensor_assign_11_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_11_cast_fp16, input = value_cache)[name = string("coreml_update_state_77_write_state")]; tensor coreml_update_state_77 = read_state(input = value_cache)[name = string("coreml_update_state_77")]; tensor var_2378_begin_0 = const()[name = string("op_2378_begin_0"), val = tensor([10, 0, 0, 0, 0])]; tensor var_2378_end_0 = const()[name = string("op_2378_end_0"), val = tensor([11, 1, 8, 2048, 128])]; tensor var_2378_end_mask_0 = const()[name = string("op_2378_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2378_squeeze_mask_0 = const()[name = string("op_2378_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2378_cast_fp16 = slice_by_index(begin = var_2378_begin_0, end = var_2378_end_0, end_mask = var_2378_end_mask_0, squeeze_mask = var_2378_squeeze_mask_0, x = coreml_update_state_76)[name = string("op_2378_cast_fp16")]; tensor var_2381_begin_0 = const()[name = string("op_2381_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2381_end_mask_0 = const()[name = string("op_2381_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2381_cast_fp16 = slice_by_index(begin = var_2381_begin_0, end = concat_12, end_mask = var_2381_end_mask_0, x = var_2378_cast_fp16)[name = string("op_2381_cast_fp16")]; tensor var_2383_begin_0 = const()[name = string("op_2383_begin_0"), val = tensor([10, 0, 0, 0, 0])]; tensor var_2383_end_0 = const()[name = string("op_2383_end_0"), val = tensor([11, 1, 8, 2048, 128])]; tensor var_2383_end_mask_0 = const()[name = string("op_2383_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2383_squeeze_mask_0 = const()[name = string("op_2383_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2383_cast_fp16 = slice_by_index(begin = var_2383_begin_0, end = var_2383_end_0, end_mask = var_2383_end_mask_0, squeeze_mask = var_2383_squeeze_mask_0, x = coreml_update_state_77)[name = string("op_2383_cast_fp16")]; tensor var_2386_begin_0 = const()[name = string("op_2386_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2386_end_mask_0 = const()[name = string("op_2386_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2386_cast_fp16 = slice_by_index(begin = var_2386_begin_0, end = concat_12, end_mask = var_2386_end_mask_0, x = var_2383_cast_fp16)[name = string("op_2386_cast_fp16")]; tensor var_2388_shape_cast_fp16 = shape(x = var_2381_cast_fp16)[name = string("op_2388_shape_cast_fp16")]; int32 gather_193 = const()[name = string("gather_193"), val = int32(1)]; int32 gather_194 = const()[name = string("gather_194"), val = int32(8)]; int32 gather_195_axis_0 = const()[name = string("gather_195_axis_0"), val = int32(0)]; int32 gather_195_batch_dims_0 = const()[name = string("gather_195_batch_dims_0"), val = int32(0)]; bool gather_195_validate_indices_0 = const()[name = string("gather_195_validate_indices_0"), val = bool(false)]; string var_2388_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2388_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_195_to_uint16 = const()[name = string("select_195_to_uint16"), val = uint16(2)]; tensor var_2388_shape_cast_fp16_to_uint16 = cast(dtype = var_2388_shape_cast_fp16_to_uint16_dtype_0, x = var_2388_shape_cast_fp16)[name = string("cast_656")]; uint16 gather_195_cast_uint16 = gather(axis = gather_195_axis_0, batch_dims = gather_195_batch_dims_0, indices = select_195_to_uint16, validate_indices = gather_195_validate_indices_0, x = var_2388_shape_cast_fp16_to_uint16)[name = string("gather_195_cast_uint16")]; string gather_195_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_195_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_196 = const()[name = string("gather_196"), val = int32(128)]; tensor var_2395_axes_0 = const()[name = string("op_2395_axes_0"), val = tensor([2])]; tensor var_2395_cast_fp16 = expand_dims(axes = var_2395_axes_0, x = var_2381_cast_fp16)[name = string("op_2395_cast_fp16")]; int32 concat_204_axis_0 = const()[name = string("concat_204_axis_0"), val = int32(0)]; bool concat_204_interleave_0 = const()[name = string("concat_204_interleave_0"), val = bool(false)]; int32 gather_195_cast_uint16_to_int32 = cast(dtype = gather_195_cast_uint16_to_int32_dtype_0, x = gather_195_cast_uint16)[name = string("cast_655")]; tensor concat_204 = concat(axis = concat_204_axis_0, interleave = concat_204_interleave_0, values = (gather_193, gather_194, var_78, gather_195_cast_uint16_to_int32, gather_196))[name = string("concat_204")]; tensor shape_217_cast_fp16 = shape(x = var_2395_cast_fp16)[name = string("shape_217_cast_fp16")]; tensor real_div_20 = real_div(x = concat_204, y = shape_217_cast_fp16)[name = string("real_div_20")]; tensor hidden_states_487_cast_fp16 = tile(reps = real_div_20, x = var_2395_cast_fp16)[name = string("hidden_states_487_cast_fp16")]; tensor concat_205x = const()[name = string("concat_205x"), val = tensor([1, 16, -1, 128])]; tensor key_41_cast_fp16 = reshape(shape = concat_205x, x = hidden_states_487_cast_fp16)[name = string("key_41_cast_fp16")]; tensor var_2405_shape_cast_fp16 = shape(x = var_2386_cast_fp16)[name = string("op_2405_shape_cast_fp16")]; int32 gather_197 = const()[name = string("gather_197"), val = int32(1)]; int32 gather_198 = const()[name = string("gather_198"), val = int32(8)]; int32 gather_199_axis_0 = const()[name = string("gather_199_axis_0"), val = int32(0)]; int32 gather_199_batch_dims_0 = const()[name = string("gather_199_batch_dims_0"), val = int32(0)]; bool gather_199_validate_indices_0 = const()[name = string("gather_199_validate_indices_0"), val = bool(false)]; string var_2405_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2405_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_199_to_uint16 = const()[name = string("select_199_to_uint16"), val = uint16(2)]; tensor var_2405_shape_cast_fp16_to_uint16 = cast(dtype = var_2405_shape_cast_fp16_to_uint16_dtype_0, x = var_2405_shape_cast_fp16)[name = string("cast_654")]; uint16 gather_199_cast_uint16 = gather(axis = gather_199_axis_0, batch_dims = gather_199_batch_dims_0, indices = select_199_to_uint16, validate_indices = gather_199_validate_indices_0, x = var_2405_shape_cast_fp16_to_uint16)[name = string("gather_199_cast_uint16")]; string gather_199_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_199_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_200 = const()[name = string("gather_200"), val = int32(128)]; tensor var_2412_axes_0 = const()[name = string("op_2412_axes_0"), val = tensor([2])]; tensor var_2412_cast_fp16 = expand_dims(axes = var_2412_axes_0, x = var_2386_cast_fp16)[name = string("op_2412_cast_fp16")]; int32 concat_206_axis_0 = const()[name = string("concat_206_axis_0"), val = int32(0)]; bool concat_206_interleave_0 = const()[name = string("concat_206_interleave_0"), val = bool(false)]; int32 gather_199_cast_uint16_to_int32 = cast(dtype = gather_199_cast_uint16_to_int32_dtype_0, x = gather_199_cast_uint16)[name = string("cast_653")]; tensor concat_206 = concat(axis = concat_206_axis_0, interleave = concat_206_interleave_0, values = (gather_197, gather_198, var_78, gather_199_cast_uint16_to_int32, gather_200))[name = string("concat_206")]; tensor shape_222_cast_fp16 = shape(x = var_2412_cast_fp16)[name = string("shape_222_cast_fp16")]; tensor real_div_21 = real_div(x = concat_206, y = shape_222_cast_fp16)[name = string("real_div_21")]; tensor hidden_states_491_cast_fp16 = tile(reps = real_div_21, x = var_2412_cast_fp16)[name = string("hidden_states_491_cast_fp16")]; tensor concat_207x = const()[name = string("concat_207x"), val = tensor([1, 16, -1, 128])]; tensor value_41_cast_fp16 = reshape(shape = concat_207x, x = hidden_states_491_cast_fp16)[name = string("value_41_cast_fp16")]; tensor var_2422_shape_cast_fp16 = shape(x = key_41_cast_fp16)[name = string("op_2422_shape_cast_fp16")]; int32 gather_201_axis_0 = const()[name = string("gather_201_axis_0"), val = int32(0)]; int32 gather_201_batch_dims_0 = const()[name = string("gather_201_batch_dims_0"), val = int32(0)]; bool gather_201_validate_indices_0 = const()[name = string("gather_201_validate_indices_0"), val = bool(false)]; string var_2422_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2422_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_201_to_uint16 = const()[name = string("select_201_to_uint16"), val = uint16(2)]; tensor var_2422_shape_cast_fp16_to_uint16 = cast(dtype = var_2422_shape_cast_fp16_to_uint16_dtype_0, x = var_2422_shape_cast_fp16)[name = string("cast_652")]; uint16 gather_201_cast_uint16 = gather(axis = gather_201_axis_0, batch_dims = gather_201_batch_dims_0, indices = select_201_to_uint16, validate_indices = gather_201_validate_indices_0, x = var_2422_shape_cast_fp16_to_uint16)[name = string("gather_201_cast_uint16")]; string gather_201_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_201_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_208_values0_0 = const()[name = string("concat_208_values0_0"), val = int32(1)]; int32 concat_208_values1_0 = const()[name = string("concat_208_values1_0"), val = int32(1)]; int32 concat_208_values2_0 = const()[name = string("concat_208_values2_0"), val = int32(0)]; int32 concat_208_axis_0 = const()[name = string("concat_208_axis_0"), val = int32(0)]; bool concat_208_interleave_0 = const()[name = string("concat_208_interleave_0"), val = bool(false)]; int32 gather_201_cast_uint16_to_int32 = cast(dtype = gather_201_cast_uint16_to_int32_dtype_0, x = gather_201_cast_uint16)[name = string("cast_651")]; tensor concat_208 = concat(axis = concat_208_axis_0, interleave = concat_208_interleave_0, values = (concat_208_values0_0, concat_208_values1_0, concat_208_values2_0, gather_201_cast_uint16_to_int32))[name = string("concat_208")]; tensor attention_mask_21_begin_0 = const()[name = string("attention_mask_21_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_21_end_mask_0 = const()[name = string("attention_mask_21_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_21_cast_fp16 = slice_by_index(begin = attention_mask_21_begin_0, end = concat_208, end_mask = attention_mask_21_end_mask_0, x = causal_mask)[name = string("attention_mask_21_cast_fp16")]; tensor mul_10_cast_fp16 = mul(x = query_41_cast_fp16, y = var_85_to_fp16)[name = string("mul_10_cast_fp16")]; bool matmul_10_transpose_y_0 = const()[name = string("matmul_10_transpose_y_0"), val = bool(true)]; bool matmul_10_transpose_x_0 = const()[name = string("matmul_10_transpose_x_0"), val = bool(false)]; tensor matmul_10_cast_fp16 = matmul(transpose_x = matmul_10_transpose_x_0, transpose_y = matmul_10_transpose_y_0, x = mul_10_cast_fp16, y = key_41_cast_fp16)[name = string("matmul_10_cast_fp16")]; tensor add_212_cast_fp16 = add(x = matmul_10_cast_fp16, y = attention_mask_21_cast_fp16)[name = string("add_212_cast_fp16")]; int32 softmax_10_axis_0 = const()[name = string("softmax_10_axis_0"), val = int32(-1)]; tensor softmax_10_cast_fp16 = softmax(axis = softmax_10_axis_0, x = add_212_cast_fp16)[name = string("softmax_10_cast_fp16")]; bool attn_output_41_transpose_x_0 = const()[name = string("attn_output_41_transpose_x_0"), val = bool(false)]; bool attn_output_41_transpose_y_0 = const()[name = string("attn_output_41_transpose_y_0"), val = bool(false)]; tensor attn_output_41_cast_fp16 = matmul(transpose_x = attn_output_41_transpose_x_0, transpose_y = attn_output_41_transpose_y_0, x = softmax_10_cast_fp16, y = value_41_cast_fp16)[name = string("attn_output_41_cast_fp16")]; tensor var_2431_perm_0 = const()[name = string("op_2431_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_209_axis_0 = const()[name = string("concat_209_axis_0"), val = int32(0)]; bool concat_209_interleave_0 = const()[name = string("concat_209_interleave_0"), val = bool(false)]; int32 gather_185_cast_uint16_to_int32 = cast(dtype = gather_185_cast_uint16_to_int32_dtype_0, x = gather_185_cast_uint16)[name = string("cast_657")]; tensor concat_209 = concat(axis = concat_209_axis_0, interleave = concat_209_interleave_0, values = (gather_184, gather_185_cast_uint16_to_int32, var_72))[name = string("concat_209")]; tensor var_2431_cast_fp16 = transpose(perm = var_2431_perm_0, x = attn_output_41_cast_fp16)[name = string("transpose_68")]; tensor var_2434_cast_fp16 = reshape(shape = concat_209, x = var_2431_cast_fp16)[name = string("op_2434_cast_fp16")]; tensor model_model_layers_10_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_10_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(634195200)))]; tensor linear_73_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_10_self_attn_o_proj_weight_to_fp16, x = var_2434_cast_fp16)[name = string("linear_73_cast_fp16")]; tensor hidden_states_495_cast_fp16 = add(x = hidden_states_459_cast_fp16, y = linear_73_cast_fp16)[name = string("hidden_states_495_cast_fp16")]; fp16 var_78_promoted_43_to_fp16 = const()[name = string("op_78_promoted_43_to_fp16"), val = fp16(0x1p+1)]; tensor var_2441_cast_fp16 = pow(x = hidden_states_495_cast_fp16, y = var_78_promoted_43_to_fp16)[name = string("op_2441_cast_fp16")]; tensor variance_87_axes_0 = const()[name = string("variance_87_axes_0"), val = tensor([-1])]; bool variance_87_keep_dims_0 = const()[name = string("variance_87_keep_dims_0"), val = bool(true)]; tensor variance_87_cast_fp16 = reduce_mean(axes = variance_87_axes_0, keep_dims = variance_87_keep_dims_0, x = var_2441_cast_fp16)[name = string("variance_87_cast_fp16")]; fp16 var_2444_to_fp16 = const()[name = string("op_2444_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2445_cast_fp16 = add(x = variance_87_cast_fp16, y = var_2444_to_fp16)[name = string("op_2445_cast_fp16")]; fp32 var_2446_epsilon_0 = const()[name = string("op_2446_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2446_cast_fp16 = rsqrt(epsilon = var_2446_epsilon_0, x = var_2445_cast_fp16)[name = string("op_2446_cast_fp16")]; tensor hidden_states_499_cast_fp16 = mul(x = hidden_states_495_cast_fp16, y = var_2446_cast_fp16)[name = string("hidden_states_499_cast_fp16")]; tensor model_model_layers_10_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_10_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638389568)))]; tensor input_83_cast_fp16 = mul(x = model_model_layers_10_post_attention_layernorm_weight_to_fp16, y = hidden_states_499_cast_fp16)[name = string("input_83_cast_fp16")]; tensor model_model_layers_10_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_10_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(638391680)))]; tensor linear_74_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_10_mlp_gate_proj_weight_to_fp16, x = input_83_cast_fp16)[name = string("linear_74_cast_fp16")]; tensor var_2458_cast_fp16 = silu(x = linear_74_cast_fp16)[name = string("op_2458_cast_fp16")]; tensor model_model_layers_10_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_10_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(644683200)))]; tensor linear_75_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_10_mlp_up_proj_weight_to_fp16, x = input_83_cast_fp16)[name = string("linear_75_cast_fp16")]; tensor input_87_cast_fp16 = mul(x = var_2458_cast_fp16, y = linear_75_cast_fp16)[name = string("input_87_cast_fp16")]; tensor model_model_layers_10_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_10_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(650974720)))]; tensor linear_76_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_10_mlp_down_proj_weight_to_fp16, x = input_87_cast_fp16)[name = string("linear_76_cast_fp16")]; tensor hidden_states_505_cast_fp16 = add(x = hidden_states_495_cast_fp16, y = linear_76_cast_fp16)[name = string("hidden_states_505_cast_fp16")]; fp16 var_78_promoted_44_to_fp16 = const()[name = string("op_78_promoted_44_to_fp16"), val = fp16(0x1p+1)]; tensor var_2471_cast_fp16 = pow(x = hidden_states_505_cast_fp16, y = var_78_promoted_44_to_fp16)[name = string("op_2471_cast_fp16")]; tensor variance_89_axes_0 = const()[name = string("variance_89_axes_0"), val = tensor([-1])]; bool variance_89_keep_dims_0 = const()[name = string("variance_89_keep_dims_0"), val = bool(true)]; tensor variance_89_cast_fp16 = reduce_mean(axes = variance_89_axes_0, keep_dims = variance_89_keep_dims_0, x = var_2471_cast_fp16)[name = string("variance_89_cast_fp16")]; fp16 var_2474_to_fp16 = const()[name = string("op_2474_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2475_cast_fp16 = add(x = variance_89_cast_fp16, y = var_2474_to_fp16)[name = string("op_2475_cast_fp16")]; fp32 var_2476_epsilon_0 = const()[name = string("op_2476_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2476_cast_fp16 = rsqrt(epsilon = var_2476_epsilon_0, x = var_2475_cast_fp16)[name = string("op_2476_cast_fp16")]; tensor hidden_states_509_cast_fp16 = mul(x = hidden_states_505_cast_fp16, y = var_2476_cast_fp16)[name = string("hidden_states_509_cast_fp16")]; tensor model_model_layers_11_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(657266240)))]; tensor hidden_states_513_cast_fp16 = mul(x = model_model_layers_11_input_layernorm_weight_to_fp16, y = hidden_states_509_cast_fp16)[name = string("hidden_states_513_cast_fp16")]; tensor var_2489_shape_cast_fp16 = shape(x = hidden_states_513_cast_fp16)[name = string("op_2489_shape_cast_fp16")]; int32 gather_202 = const()[name = string("gather_202"), val = int32(1)]; int32 gather_203_axis_0 = const()[name = string("gather_203_axis_0"), val = int32(0)]; int32 gather_203_batch_dims_0 = const()[name = string("gather_203_batch_dims_0"), val = int32(0)]; bool gather_203_validate_indices_0 = const()[name = string("gather_203_validate_indices_0"), val = bool(false)]; string var_2489_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2489_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_203_to_uint16 = const()[name = string("select_203_to_uint16"), val = uint16(1)]; tensor var_2489_shape_cast_fp16_to_uint16 = cast(dtype = var_2489_shape_cast_fp16_to_uint16_dtype_0, x = var_2489_shape_cast_fp16)[name = string("cast_650")]; uint16 gather_203_cast_uint16 = gather(axis = gather_203_axis_0, batch_dims = gather_203_batch_dims_0, indices = select_203_to_uint16, validate_indices = gather_203_validate_indices_0, x = var_2489_shape_cast_fp16_to_uint16)[name = string("gather_203_cast_uint16")]; string gather_203_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_203_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_11_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(657268352)))]; tensor linear_77_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_11_self_attn_q_proj_weight_to_fp16, x = hidden_states_513_cast_fp16)[name = string("linear_77_cast_fp16")]; tensor concat_210x = const()[name = string("concat_210x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_515_cast_fp16 = reshape(shape = concat_210x, x = linear_77_cast_fp16)[name = string("hidden_states_515_cast_fp16")]; fp16 var_78_promoted_45_to_fp16 = const()[name = string("op_78_promoted_45_to_fp16"), val = fp16(0x1p+1)]; tensor var_2497_cast_fp16 = pow(x = hidden_states_515_cast_fp16, y = var_78_promoted_45_to_fp16)[name = string("op_2497_cast_fp16")]; tensor variance_91_axes_0 = const()[name = string("variance_91_axes_0"), val = tensor([-1])]; bool variance_91_keep_dims_0 = const()[name = string("variance_91_keep_dims_0"), val = bool(true)]; tensor variance_91_cast_fp16 = reduce_mean(axes = variance_91_axes_0, keep_dims = variance_91_keep_dims_0, x = var_2497_cast_fp16)[name = string("variance_91_cast_fp16")]; fp16 var_2500_to_fp16 = const()[name = string("op_2500_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2501_cast_fp16 = add(x = variance_91_cast_fp16, y = var_2500_to_fp16)[name = string("op_2501_cast_fp16")]; fp32 var_2502_epsilon_0 = const()[name = string("op_2502_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2502_cast_fp16 = rsqrt(epsilon = var_2502_epsilon_0, x = var_2501_cast_fp16)[name = string("op_2502_cast_fp16")]; tensor hidden_states_519_cast_fp16 = mul(x = hidden_states_515_cast_fp16, y = var_2502_cast_fp16)[name = string("hidden_states_519_cast_fp16")]; tensor model_model_layers_11_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(661462720)))]; tensor var_2505_cast_fp16 = mul(x = model_model_layers_11_self_attn_q_norm_weight_to_fp16, y = hidden_states_519_cast_fp16)[name = string("op_2505_cast_fp16")]; tensor q_23_perm_0 = const()[name = string("q_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_11_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(661463040)))]; tensor linear_78_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_11_self_attn_k_proj_weight_to_fp16, x = hidden_states_513_cast_fp16)[name = string("linear_78_cast_fp16")]; tensor concat_211x = const()[name = string("concat_211x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_523_cast_fp16 = reshape(shape = concat_211x, x = linear_78_cast_fp16)[name = string("hidden_states_523_cast_fp16")]; fp16 var_78_promoted_46_to_fp16 = const()[name = string("op_78_promoted_46_to_fp16"), val = fp16(0x1p+1)]; tensor var_2513_cast_fp16 = pow(x = hidden_states_523_cast_fp16, y = var_78_promoted_46_to_fp16)[name = string("op_2513_cast_fp16")]; tensor variance_93_axes_0 = const()[name = string("variance_93_axes_0"), val = tensor([-1])]; bool variance_93_keep_dims_0 = const()[name = string("variance_93_keep_dims_0"), val = bool(true)]; tensor variance_93_cast_fp16 = reduce_mean(axes = variance_93_axes_0, keep_dims = variance_93_keep_dims_0, x = var_2513_cast_fp16)[name = string("variance_93_cast_fp16")]; fp16 var_2516_to_fp16 = const()[name = string("op_2516_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2517_cast_fp16 = add(x = variance_93_cast_fp16, y = var_2516_to_fp16)[name = string("op_2517_cast_fp16")]; fp32 var_2518_epsilon_0 = const()[name = string("op_2518_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2518_cast_fp16 = rsqrt(epsilon = var_2518_epsilon_0, x = var_2517_cast_fp16)[name = string("op_2518_cast_fp16")]; tensor hidden_states_527_cast_fp16 = mul(x = hidden_states_523_cast_fp16, y = var_2518_cast_fp16)[name = string("hidden_states_527_cast_fp16")]; tensor model_model_layers_11_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663560256)))]; tensor var_2521_cast_fp16 = mul(x = model_model_layers_11_self_attn_k_norm_weight_to_fp16, y = hidden_states_527_cast_fp16)[name = string("op_2521_cast_fp16")]; tensor k_23_perm_0 = const()[name = string("k_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_11_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(663560576)))]; tensor linear_79_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_11_self_attn_v_proj_weight_to_fp16, x = hidden_states_513_cast_fp16)[name = string("linear_79_cast_fp16")]; tensor concat_212x = const()[name = string("concat_212x"), val = tensor([1, -1, 8, 128])]; tensor var_2526_cast_fp16 = reshape(shape = concat_212x, x = linear_79_cast_fp16)[name = string("op_2526_cast_fp16")]; tensor v_state_23_perm_0 = const()[name = string("v_state_23_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_23_cast_fp16 = transpose(perm = q_23_perm_0, x = var_2505_cast_fp16)[name = string("transpose_67")]; tensor var_2530_cast_fp16 = mul(x = q_23_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2530_cast_fp16")]; tensor x1_45_begin_0 = const()[name = string("x1_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_45_end_0 = const()[name = string("x1_45_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_45_end_mask_0 = const()[name = string("x1_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_45_cast_fp16 = slice_by_index(begin = x1_45_begin_0, end = x1_45_end_0, end_mask = x1_45_end_mask_0, x = q_23_cast_fp16)[name = string("x1_45_cast_fp16")]; tensor x2_45_begin_0 = const()[name = string("x2_45_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_45_end_0 = const()[name = string("x2_45_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_45_end_mask_0 = const()[name = string("x2_45_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_45_cast_fp16 = slice_by_index(begin = x2_45_begin_0, end = x2_45_end_0, end_mask = x2_45_end_mask_0, x = q_23_cast_fp16)[name = string("x2_45_cast_fp16")]; fp16 const_26_promoted_to_fp16 = const()[name = string("const_26_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2541_cast_fp16 = mul(x = x2_45_cast_fp16, y = const_26_promoted_to_fp16)[name = string("op_2541_cast_fp16")]; bool var_2543_interleave_0 = const()[name = string("op_2543_interleave_0"), val = bool(false)]; tensor var_2543_cast_fp16 = concat(axis = var_72, interleave = var_2543_interleave_0, values = (var_2541_cast_fp16, x1_45_cast_fp16))[name = string("op_2543_cast_fp16")]; tensor var_2544_cast_fp16 = mul(x = var_2543_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2544_cast_fp16")]; tensor query_45_cast_fp16 = add(x = var_2530_cast_fp16, y = var_2544_cast_fp16)[name = string("query_45_cast_fp16")]; tensor k_23_cast_fp16 = transpose(perm = k_23_perm_0, x = var_2521_cast_fp16)[name = string("transpose_66")]; tensor var_2546_cast_fp16 = mul(x = k_23_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2546_cast_fp16")]; tensor x1_47_begin_0 = const()[name = string("x1_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_47_end_0 = const()[name = string("x1_47_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_47_end_mask_0 = const()[name = string("x1_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_47_cast_fp16 = slice_by_index(begin = x1_47_begin_0, end = x1_47_end_0, end_mask = x1_47_end_mask_0, x = k_23_cast_fp16)[name = string("x1_47_cast_fp16")]; tensor x2_47_begin_0 = const()[name = string("x2_47_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_47_end_0 = const()[name = string("x2_47_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_47_end_mask_0 = const()[name = string("x2_47_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_47_cast_fp16 = slice_by_index(begin = x2_47_begin_0, end = x2_47_end_0, end_mask = x2_47_end_mask_0, x = k_23_cast_fp16)[name = string("x2_47_cast_fp16")]; fp16 const_27_promoted_to_fp16 = const()[name = string("const_27_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2557_cast_fp16 = mul(x = x2_47_cast_fp16, y = const_27_promoted_to_fp16)[name = string("op_2557_cast_fp16")]; bool var_2559_interleave_0 = const()[name = string("op_2559_interleave_0"), val = bool(false)]; tensor var_2559_cast_fp16 = concat(axis = var_72, interleave = var_2559_interleave_0, values = (var_2557_cast_fp16, x1_47_cast_fp16))[name = string("op_2559_cast_fp16")]; tensor var_2560_cast_fp16 = mul(x = var_2559_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2560_cast_fp16")]; tensor k_state_23_cast_fp16 = add(x = var_2546_cast_fp16, y = var_2560_cast_fp16)[name = string("k_state_23_cast_fp16")]; tensor expand_dims_132 = const()[name = string("expand_dims_132"), val = tensor([0])]; tensor expand_dims_133 = const()[name = string("expand_dims_133"), val = tensor([0])]; tensor expand_dims_135 = const()[name = string("expand_dims_135"), val = tensor([0])]; tensor concat_215_values0_0 = const()[name = string("concat_215_values0_0"), val = tensor([11])]; int32 concat_215_axis_0 = const()[name = string("concat_215_axis_0"), val = int32(0)]; bool concat_215_interleave_0 = const()[name = string("concat_215_interleave_0"), val = bool(false)]; tensor concat_215 = concat(axis = concat_215_axis_0, interleave = concat_215_interleave_0, values = (concat_215_values0_0, expand_dims_132, expand_dims_133, expand_dims_2, expand_dims_135))[name = string("concat_215")]; tensor key_cache_internal_tensor_assign_12_stride_0 = const()[name = string("key_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_215, begin_mask = key_cache_internal_tensor_assign_12_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_12_squeeze_mask_0, stride = key_cache_internal_tensor_assign_12_stride_0, update = k_state_23_cast_fp16, x = coreml_update_state_76)[name = string("key_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_12_cast_fp16, input = key_cache)[name = string("coreml_update_state_78_write_state")]; tensor coreml_update_state_78 = read_state(input = key_cache)[name = string("coreml_update_state_78")]; tensor value_cache_internal_tensor_assign_12_stride_0 = const()[name = string("value_cache_internal_tensor_assign_12_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_12_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_12_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_12_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_12_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_23_cast_fp16 = transpose(perm = v_state_23_perm_0, x = var_2526_cast_fp16)[name = string("transpose_65")]; tensor value_cache_internal_tensor_assign_12_cast_fp16 = slice_update(begin = concat_215, begin_mask = value_cache_internal_tensor_assign_12_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_12_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_12_squeeze_mask_0, stride = value_cache_internal_tensor_assign_12_stride_0, update = v_state_23_cast_fp16, x = coreml_update_state_77)[name = string("value_cache_internal_tensor_assign_12_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_12_cast_fp16, input = value_cache)[name = string("coreml_update_state_79_write_state")]; tensor coreml_update_state_79 = read_state(input = value_cache)[name = string("coreml_update_state_79")]; tensor var_2583_begin_0 = const()[name = string("op_2583_begin_0"), val = tensor([11, 0, 0, 0, 0])]; tensor var_2583_end_0 = const()[name = string("op_2583_end_0"), val = tensor([12, 1, 8, 2048, 128])]; tensor var_2583_end_mask_0 = const()[name = string("op_2583_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2583_squeeze_mask_0 = const()[name = string("op_2583_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2583_cast_fp16 = slice_by_index(begin = var_2583_begin_0, end = var_2583_end_0, end_mask = var_2583_end_mask_0, squeeze_mask = var_2583_squeeze_mask_0, x = coreml_update_state_78)[name = string("op_2583_cast_fp16")]; tensor var_2586_begin_0 = const()[name = string("op_2586_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2586_end_mask_0 = const()[name = string("op_2586_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2586_cast_fp16 = slice_by_index(begin = var_2586_begin_0, end = concat_12, end_mask = var_2586_end_mask_0, x = var_2583_cast_fp16)[name = string("op_2586_cast_fp16")]; tensor var_2588_begin_0 = const()[name = string("op_2588_begin_0"), val = tensor([11, 0, 0, 0, 0])]; tensor var_2588_end_0 = const()[name = string("op_2588_end_0"), val = tensor([12, 1, 8, 2048, 128])]; tensor var_2588_end_mask_0 = const()[name = string("op_2588_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2588_squeeze_mask_0 = const()[name = string("op_2588_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2588_cast_fp16 = slice_by_index(begin = var_2588_begin_0, end = var_2588_end_0, end_mask = var_2588_end_mask_0, squeeze_mask = var_2588_squeeze_mask_0, x = coreml_update_state_79)[name = string("op_2588_cast_fp16")]; tensor var_2591_begin_0 = const()[name = string("op_2591_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2591_end_mask_0 = const()[name = string("op_2591_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2591_cast_fp16 = slice_by_index(begin = var_2591_begin_0, end = concat_12, end_mask = var_2591_end_mask_0, x = var_2588_cast_fp16)[name = string("op_2591_cast_fp16")]; tensor var_2593_shape_cast_fp16 = shape(x = var_2586_cast_fp16)[name = string("op_2593_shape_cast_fp16")]; int32 gather_211 = const()[name = string("gather_211"), val = int32(1)]; int32 gather_212 = const()[name = string("gather_212"), val = int32(8)]; int32 gather_213_axis_0 = const()[name = string("gather_213_axis_0"), val = int32(0)]; int32 gather_213_batch_dims_0 = const()[name = string("gather_213_batch_dims_0"), val = int32(0)]; bool gather_213_validate_indices_0 = const()[name = string("gather_213_validate_indices_0"), val = bool(false)]; string var_2593_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2593_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_213_to_uint16 = const()[name = string("select_213_to_uint16"), val = uint16(2)]; tensor var_2593_shape_cast_fp16_to_uint16 = cast(dtype = var_2593_shape_cast_fp16_to_uint16_dtype_0, x = var_2593_shape_cast_fp16)[name = string("cast_648")]; uint16 gather_213_cast_uint16 = gather(axis = gather_213_axis_0, batch_dims = gather_213_batch_dims_0, indices = select_213_to_uint16, validate_indices = gather_213_validate_indices_0, x = var_2593_shape_cast_fp16_to_uint16)[name = string("gather_213_cast_uint16")]; string gather_213_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_213_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_214 = const()[name = string("gather_214"), val = int32(128)]; tensor var_2600_axes_0 = const()[name = string("op_2600_axes_0"), val = tensor([2])]; tensor var_2600_cast_fp16 = expand_dims(axes = var_2600_axes_0, x = var_2586_cast_fp16)[name = string("op_2600_cast_fp16")]; int32 concat_223_axis_0 = const()[name = string("concat_223_axis_0"), val = int32(0)]; bool concat_223_interleave_0 = const()[name = string("concat_223_interleave_0"), val = bool(false)]; int32 gather_213_cast_uint16_to_int32 = cast(dtype = gather_213_cast_uint16_to_int32_dtype_0, x = gather_213_cast_uint16)[name = string("cast_647")]; tensor concat_223 = concat(axis = concat_223_axis_0, interleave = concat_223_interleave_0, values = (gather_211, gather_212, var_78, gather_213_cast_uint16_to_int32, gather_214))[name = string("concat_223")]; tensor shape_237_cast_fp16 = shape(x = var_2600_cast_fp16)[name = string("shape_237_cast_fp16")]; tensor real_div_22 = real_div(x = concat_223, y = shape_237_cast_fp16)[name = string("real_div_22")]; tensor hidden_states_533_cast_fp16 = tile(reps = real_div_22, x = var_2600_cast_fp16)[name = string("hidden_states_533_cast_fp16")]; tensor concat_224x = const()[name = string("concat_224x"), val = tensor([1, 16, -1, 128])]; tensor key_45_cast_fp16 = reshape(shape = concat_224x, x = hidden_states_533_cast_fp16)[name = string("key_45_cast_fp16")]; tensor var_2610_shape_cast_fp16 = shape(x = var_2591_cast_fp16)[name = string("op_2610_shape_cast_fp16")]; int32 gather_215 = const()[name = string("gather_215"), val = int32(1)]; int32 gather_216 = const()[name = string("gather_216"), val = int32(8)]; int32 gather_217_axis_0 = const()[name = string("gather_217_axis_0"), val = int32(0)]; int32 gather_217_batch_dims_0 = const()[name = string("gather_217_batch_dims_0"), val = int32(0)]; bool gather_217_validate_indices_0 = const()[name = string("gather_217_validate_indices_0"), val = bool(false)]; string var_2610_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2610_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_217_to_uint16 = const()[name = string("select_217_to_uint16"), val = uint16(2)]; tensor var_2610_shape_cast_fp16_to_uint16 = cast(dtype = var_2610_shape_cast_fp16_to_uint16_dtype_0, x = var_2610_shape_cast_fp16)[name = string("cast_646")]; uint16 gather_217_cast_uint16 = gather(axis = gather_217_axis_0, batch_dims = gather_217_batch_dims_0, indices = select_217_to_uint16, validate_indices = gather_217_validate_indices_0, x = var_2610_shape_cast_fp16_to_uint16)[name = string("gather_217_cast_uint16")]; string gather_217_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_217_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_218 = const()[name = string("gather_218"), val = int32(128)]; tensor var_2617_axes_0 = const()[name = string("op_2617_axes_0"), val = tensor([2])]; tensor var_2617_cast_fp16 = expand_dims(axes = var_2617_axes_0, x = var_2591_cast_fp16)[name = string("op_2617_cast_fp16")]; int32 concat_225_axis_0 = const()[name = string("concat_225_axis_0"), val = int32(0)]; bool concat_225_interleave_0 = const()[name = string("concat_225_interleave_0"), val = bool(false)]; int32 gather_217_cast_uint16_to_int32 = cast(dtype = gather_217_cast_uint16_to_int32_dtype_0, x = gather_217_cast_uint16)[name = string("cast_645")]; tensor concat_225 = concat(axis = concat_225_axis_0, interleave = concat_225_interleave_0, values = (gather_215, gather_216, var_78, gather_217_cast_uint16_to_int32, gather_218))[name = string("concat_225")]; tensor shape_242_cast_fp16 = shape(x = var_2617_cast_fp16)[name = string("shape_242_cast_fp16")]; tensor real_div_23 = real_div(x = concat_225, y = shape_242_cast_fp16)[name = string("real_div_23")]; tensor hidden_states_537_cast_fp16 = tile(reps = real_div_23, x = var_2617_cast_fp16)[name = string("hidden_states_537_cast_fp16")]; tensor concat_226x = const()[name = string("concat_226x"), val = tensor([1, 16, -1, 128])]; tensor value_45_cast_fp16 = reshape(shape = concat_226x, x = hidden_states_537_cast_fp16)[name = string("value_45_cast_fp16")]; tensor var_2627_shape_cast_fp16 = shape(x = key_45_cast_fp16)[name = string("op_2627_shape_cast_fp16")]; int32 gather_219_axis_0 = const()[name = string("gather_219_axis_0"), val = int32(0)]; int32 gather_219_batch_dims_0 = const()[name = string("gather_219_batch_dims_0"), val = int32(0)]; bool gather_219_validate_indices_0 = const()[name = string("gather_219_validate_indices_0"), val = bool(false)]; string var_2627_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2627_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_219_to_uint16 = const()[name = string("select_219_to_uint16"), val = uint16(2)]; tensor var_2627_shape_cast_fp16_to_uint16 = cast(dtype = var_2627_shape_cast_fp16_to_uint16_dtype_0, x = var_2627_shape_cast_fp16)[name = string("cast_644")]; uint16 gather_219_cast_uint16 = gather(axis = gather_219_axis_0, batch_dims = gather_219_batch_dims_0, indices = select_219_to_uint16, validate_indices = gather_219_validate_indices_0, x = var_2627_shape_cast_fp16_to_uint16)[name = string("gather_219_cast_uint16")]; string gather_219_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_219_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_227_values0_0 = const()[name = string("concat_227_values0_0"), val = int32(1)]; int32 concat_227_values1_0 = const()[name = string("concat_227_values1_0"), val = int32(1)]; int32 concat_227_values2_0 = const()[name = string("concat_227_values2_0"), val = int32(0)]; int32 concat_227_axis_0 = const()[name = string("concat_227_axis_0"), val = int32(0)]; bool concat_227_interleave_0 = const()[name = string("concat_227_interleave_0"), val = bool(false)]; int32 gather_219_cast_uint16_to_int32 = cast(dtype = gather_219_cast_uint16_to_int32_dtype_0, x = gather_219_cast_uint16)[name = string("cast_643")]; tensor concat_227 = concat(axis = concat_227_axis_0, interleave = concat_227_interleave_0, values = (concat_227_values0_0, concat_227_values1_0, concat_227_values2_0, gather_219_cast_uint16_to_int32))[name = string("concat_227")]; tensor attention_mask_23_begin_0 = const()[name = string("attention_mask_23_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_23_end_mask_0 = const()[name = string("attention_mask_23_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_23_cast_fp16 = slice_by_index(begin = attention_mask_23_begin_0, end = concat_227, end_mask = attention_mask_23_end_mask_0, x = causal_mask)[name = string("attention_mask_23_cast_fp16")]; tensor mul_11_cast_fp16 = mul(x = query_45_cast_fp16, y = var_85_to_fp16)[name = string("mul_11_cast_fp16")]; bool matmul_11_transpose_y_0 = const()[name = string("matmul_11_transpose_y_0"), val = bool(true)]; bool matmul_11_transpose_x_0 = const()[name = string("matmul_11_transpose_x_0"), val = bool(false)]; tensor matmul_11_cast_fp16 = matmul(transpose_x = matmul_11_transpose_x_0, transpose_y = matmul_11_transpose_y_0, x = mul_11_cast_fp16, y = key_45_cast_fp16)[name = string("matmul_11_cast_fp16")]; tensor add_231_cast_fp16 = add(x = matmul_11_cast_fp16, y = attention_mask_23_cast_fp16)[name = string("add_231_cast_fp16")]; int32 softmax_11_axis_0 = const()[name = string("softmax_11_axis_0"), val = int32(-1)]; tensor softmax_11_cast_fp16 = softmax(axis = softmax_11_axis_0, x = add_231_cast_fp16)[name = string("softmax_11_cast_fp16")]; bool attn_output_45_transpose_x_0 = const()[name = string("attn_output_45_transpose_x_0"), val = bool(false)]; bool attn_output_45_transpose_y_0 = const()[name = string("attn_output_45_transpose_y_0"), val = bool(false)]; tensor attn_output_45_cast_fp16 = matmul(transpose_x = attn_output_45_transpose_x_0, transpose_y = attn_output_45_transpose_y_0, x = softmax_11_cast_fp16, y = value_45_cast_fp16)[name = string("attn_output_45_cast_fp16")]; tensor var_2636_perm_0 = const()[name = string("op_2636_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_228_axis_0 = const()[name = string("concat_228_axis_0"), val = int32(0)]; bool concat_228_interleave_0 = const()[name = string("concat_228_interleave_0"), val = bool(false)]; int32 gather_203_cast_uint16_to_int32 = cast(dtype = gather_203_cast_uint16_to_int32_dtype_0, x = gather_203_cast_uint16)[name = string("cast_649")]; tensor concat_228 = concat(axis = concat_228_axis_0, interleave = concat_228_interleave_0, values = (gather_202, gather_203_cast_uint16_to_int32, var_72))[name = string("concat_228")]; tensor var_2636_cast_fp16 = transpose(perm = var_2636_perm_0, x = attn_output_45_cast_fp16)[name = string("transpose_64")]; tensor var_2639_cast_fp16 = reshape(shape = concat_228, x = var_2636_cast_fp16)[name = string("op_2639_cast_fp16")]; tensor model_model_layers_11_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_11_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(665657792)))]; tensor linear_80_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_11_self_attn_o_proj_weight_to_fp16, x = var_2639_cast_fp16)[name = string("linear_80_cast_fp16")]; tensor hidden_states_541_cast_fp16 = add(x = hidden_states_505_cast_fp16, y = linear_80_cast_fp16)[name = string("hidden_states_541_cast_fp16")]; fp16 var_78_promoted_47_to_fp16 = const()[name = string("op_78_promoted_47_to_fp16"), val = fp16(0x1p+1)]; tensor var_2646_cast_fp16 = pow(x = hidden_states_541_cast_fp16, y = var_78_promoted_47_to_fp16)[name = string("op_2646_cast_fp16")]; tensor variance_95_axes_0 = const()[name = string("variance_95_axes_0"), val = tensor([-1])]; bool variance_95_keep_dims_0 = const()[name = string("variance_95_keep_dims_0"), val = bool(true)]; tensor variance_95_cast_fp16 = reduce_mean(axes = variance_95_axes_0, keep_dims = variance_95_keep_dims_0, x = var_2646_cast_fp16)[name = string("variance_95_cast_fp16")]; fp16 var_2649_to_fp16 = const()[name = string("op_2649_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2650_cast_fp16 = add(x = variance_95_cast_fp16, y = var_2649_to_fp16)[name = string("op_2650_cast_fp16")]; fp32 var_2651_epsilon_0 = const()[name = string("op_2651_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2651_cast_fp16 = rsqrt(epsilon = var_2651_epsilon_0, x = var_2650_cast_fp16)[name = string("op_2651_cast_fp16")]; tensor hidden_states_545_cast_fp16 = mul(x = hidden_states_541_cast_fp16, y = var_2651_cast_fp16)[name = string("hidden_states_545_cast_fp16")]; tensor model_model_layers_11_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_11_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669852160)))]; tensor input_91_cast_fp16 = mul(x = model_model_layers_11_post_attention_layernorm_weight_to_fp16, y = hidden_states_545_cast_fp16)[name = string("input_91_cast_fp16")]; tensor model_model_layers_11_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_11_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(669854272)))]; tensor linear_81_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_11_mlp_gate_proj_weight_to_fp16, x = input_91_cast_fp16)[name = string("linear_81_cast_fp16")]; tensor var_2663_cast_fp16 = silu(x = linear_81_cast_fp16)[name = string("op_2663_cast_fp16")]; tensor model_model_layers_11_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_11_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(676145792)))]; tensor linear_82_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_11_mlp_up_proj_weight_to_fp16, x = input_91_cast_fp16)[name = string("linear_82_cast_fp16")]; tensor input_95_cast_fp16 = mul(x = var_2663_cast_fp16, y = linear_82_cast_fp16)[name = string("input_95_cast_fp16")]; tensor model_model_layers_11_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_11_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(682437312)))]; tensor linear_83_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_11_mlp_down_proj_weight_to_fp16, x = input_95_cast_fp16)[name = string("linear_83_cast_fp16")]; tensor hidden_states_551_cast_fp16 = add(x = hidden_states_541_cast_fp16, y = linear_83_cast_fp16)[name = string("hidden_states_551_cast_fp16")]; fp16 var_78_promoted_48_to_fp16 = const()[name = string("op_78_promoted_48_to_fp16"), val = fp16(0x1p+1)]; tensor var_2676_cast_fp16 = pow(x = hidden_states_551_cast_fp16, y = var_78_promoted_48_to_fp16)[name = string("op_2676_cast_fp16")]; tensor variance_97_axes_0 = const()[name = string("variance_97_axes_0"), val = tensor([-1])]; bool variance_97_keep_dims_0 = const()[name = string("variance_97_keep_dims_0"), val = bool(true)]; tensor variance_97_cast_fp16 = reduce_mean(axes = variance_97_axes_0, keep_dims = variance_97_keep_dims_0, x = var_2676_cast_fp16)[name = string("variance_97_cast_fp16")]; fp16 var_2679_to_fp16 = const()[name = string("op_2679_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2680_cast_fp16 = add(x = variance_97_cast_fp16, y = var_2679_to_fp16)[name = string("op_2680_cast_fp16")]; fp32 var_2681_epsilon_0 = const()[name = string("op_2681_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2681_cast_fp16 = rsqrt(epsilon = var_2681_epsilon_0, x = var_2680_cast_fp16)[name = string("op_2681_cast_fp16")]; tensor hidden_states_555_cast_fp16 = mul(x = hidden_states_551_cast_fp16, y = var_2681_cast_fp16)[name = string("hidden_states_555_cast_fp16")]; tensor model_model_layers_12_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(688728832)))]; tensor hidden_states_559_cast_fp16 = mul(x = model_model_layers_12_input_layernorm_weight_to_fp16, y = hidden_states_555_cast_fp16)[name = string("hidden_states_559_cast_fp16")]; tensor var_2694_shape_cast_fp16 = shape(x = hidden_states_559_cast_fp16)[name = string("op_2694_shape_cast_fp16")]; int32 gather_220 = const()[name = string("gather_220"), val = int32(1)]; int32 gather_221_axis_0 = const()[name = string("gather_221_axis_0"), val = int32(0)]; int32 gather_221_batch_dims_0 = const()[name = string("gather_221_batch_dims_0"), val = int32(0)]; bool gather_221_validate_indices_0 = const()[name = string("gather_221_validate_indices_0"), val = bool(false)]; string var_2694_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2694_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_221_to_uint16 = const()[name = string("select_221_to_uint16"), val = uint16(1)]; tensor var_2694_shape_cast_fp16_to_uint16 = cast(dtype = var_2694_shape_cast_fp16_to_uint16_dtype_0, x = var_2694_shape_cast_fp16)[name = string("cast_642")]; uint16 gather_221_cast_uint16 = gather(axis = gather_221_axis_0, batch_dims = gather_221_batch_dims_0, indices = select_221_to_uint16, validate_indices = gather_221_validate_indices_0, x = var_2694_shape_cast_fp16_to_uint16)[name = string("gather_221_cast_uint16")]; string gather_221_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_221_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_12_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(688730944)))]; tensor linear_84_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_12_self_attn_q_proj_weight_to_fp16, x = hidden_states_559_cast_fp16)[name = string("linear_84_cast_fp16")]; tensor concat_229x = const()[name = string("concat_229x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_561_cast_fp16 = reshape(shape = concat_229x, x = linear_84_cast_fp16)[name = string("hidden_states_561_cast_fp16")]; fp16 var_78_promoted_49_to_fp16 = const()[name = string("op_78_promoted_49_to_fp16"), val = fp16(0x1p+1)]; tensor var_2702_cast_fp16 = pow(x = hidden_states_561_cast_fp16, y = var_78_promoted_49_to_fp16)[name = string("op_2702_cast_fp16")]; tensor variance_99_axes_0 = const()[name = string("variance_99_axes_0"), val = tensor([-1])]; bool variance_99_keep_dims_0 = const()[name = string("variance_99_keep_dims_0"), val = bool(true)]; tensor variance_99_cast_fp16 = reduce_mean(axes = variance_99_axes_0, keep_dims = variance_99_keep_dims_0, x = var_2702_cast_fp16)[name = string("variance_99_cast_fp16")]; fp16 var_2705_to_fp16 = const()[name = string("op_2705_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2706_cast_fp16 = add(x = variance_99_cast_fp16, y = var_2705_to_fp16)[name = string("op_2706_cast_fp16")]; fp32 var_2707_epsilon_0 = const()[name = string("op_2707_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2707_cast_fp16 = rsqrt(epsilon = var_2707_epsilon_0, x = var_2706_cast_fp16)[name = string("op_2707_cast_fp16")]; tensor hidden_states_565_cast_fp16 = mul(x = hidden_states_561_cast_fp16, y = var_2707_cast_fp16)[name = string("hidden_states_565_cast_fp16")]; tensor model_model_layers_12_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692925312)))]; tensor var_2710_cast_fp16 = mul(x = model_model_layers_12_self_attn_q_norm_weight_to_fp16, y = hidden_states_565_cast_fp16)[name = string("op_2710_cast_fp16")]; tensor q_25_perm_0 = const()[name = string("q_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_12_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(692925632)))]; tensor linear_85_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_12_self_attn_k_proj_weight_to_fp16, x = hidden_states_559_cast_fp16)[name = string("linear_85_cast_fp16")]; tensor concat_230x = const()[name = string("concat_230x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_569_cast_fp16 = reshape(shape = concat_230x, x = linear_85_cast_fp16)[name = string("hidden_states_569_cast_fp16")]; fp16 var_78_promoted_50_to_fp16 = const()[name = string("op_78_promoted_50_to_fp16"), val = fp16(0x1p+1)]; tensor var_2718_cast_fp16 = pow(x = hidden_states_569_cast_fp16, y = var_78_promoted_50_to_fp16)[name = string("op_2718_cast_fp16")]; tensor variance_101_axes_0 = const()[name = string("variance_101_axes_0"), val = tensor([-1])]; bool variance_101_keep_dims_0 = const()[name = string("variance_101_keep_dims_0"), val = bool(true)]; tensor variance_101_cast_fp16 = reduce_mean(axes = variance_101_axes_0, keep_dims = variance_101_keep_dims_0, x = var_2718_cast_fp16)[name = string("variance_101_cast_fp16")]; fp16 var_2721_to_fp16 = const()[name = string("op_2721_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2722_cast_fp16 = add(x = variance_101_cast_fp16, y = var_2721_to_fp16)[name = string("op_2722_cast_fp16")]; fp32 var_2723_epsilon_0 = const()[name = string("op_2723_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2723_cast_fp16 = rsqrt(epsilon = var_2723_epsilon_0, x = var_2722_cast_fp16)[name = string("op_2723_cast_fp16")]; tensor hidden_states_573_cast_fp16 = mul(x = hidden_states_569_cast_fp16, y = var_2723_cast_fp16)[name = string("hidden_states_573_cast_fp16")]; tensor model_model_layers_12_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695022848)))]; tensor var_2726_cast_fp16 = mul(x = model_model_layers_12_self_attn_k_norm_weight_to_fp16, y = hidden_states_573_cast_fp16)[name = string("op_2726_cast_fp16")]; tensor k_25_perm_0 = const()[name = string("k_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_12_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(695023168)))]; tensor linear_86_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_12_self_attn_v_proj_weight_to_fp16, x = hidden_states_559_cast_fp16)[name = string("linear_86_cast_fp16")]; tensor concat_231x = const()[name = string("concat_231x"), val = tensor([1, -1, 8, 128])]; tensor var_2731_cast_fp16 = reshape(shape = concat_231x, x = linear_86_cast_fp16)[name = string("op_2731_cast_fp16")]; tensor v_state_25_perm_0 = const()[name = string("v_state_25_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_25_cast_fp16 = transpose(perm = q_25_perm_0, x = var_2710_cast_fp16)[name = string("transpose_63")]; tensor var_2735_cast_fp16 = mul(x = q_25_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2735_cast_fp16")]; tensor x1_49_begin_0 = const()[name = string("x1_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_49_end_0 = const()[name = string("x1_49_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_49_end_mask_0 = const()[name = string("x1_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_49_cast_fp16 = slice_by_index(begin = x1_49_begin_0, end = x1_49_end_0, end_mask = x1_49_end_mask_0, x = q_25_cast_fp16)[name = string("x1_49_cast_fp16")]; tensor x2_49_begin_0 = const()[name = string("x2_49_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_49_end_0 = const()[name = string("x2_49_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_49_end_mask_0 = const()[name = string("x2_49_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_49_cast_fp16 = slice_by_index(begin = x2_49_begin_0, end = x2_49_end_0, end_mask = x2_49_end_mask_0, x = q_25_cast_fp16)[name = string("x2_49_cast_fp16")]; fp16 const_28_promoted_to_fp16 = const()[name = string("const_28_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2746_cast_fp16 = mul(x = x2_49_cast_fp16, y = const_28_promoted_to_fp16)[name = string("op_2746_cast_fp16")]; bool var_2748_interleave_0 = const()[name = string("op_2748_interleave_0"), val = bool(false)]; tensor var_2748_cast_fp16 = concat(axis = var_72, interleave = var_2748_interleave_0, values = (var_2746_cast_fp16, x1_49_cast_fp16))[name = string("op_2748_cast_fp16")]; tensor var_2749_cast_fp16 = mul(x = var_2748_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2749_cast_fp16")]; tensor query_49_cast_fp16 = add(x = var_2735_cast_fp16, y = var_2749_cast_fp16)[name = string("query_49_cast_fp16")]; tensor k_25_cast_fp16 = transpose(perm = k_25_perm_0, x = var_2726_cast_fp16)[name = string("transpose_62")]; tensor var_2751_cast_fp16 = mul(x = k_25_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2751_cast_fp16")]; tensor x1_51_begin_0 = const()[name = string("x1_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_51_end_0 = const()[name = string("x1_51_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_51_end_mask_0 = const()[name = string("x1_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_51_cast_fp16 = slice_by_index(begin = x1_51_begin_0, end = x1_51_end_0, end_mask = x1_51_end_mask_0, x = k_25_cast_fp16)[name = string("x1_51_cast_fp16")]; tensor x2_51_begin_0 = const()[name = string("x2_51_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_51_end_0 = const()[name = string("x2_51_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_51_end_mask_0 = const()[name = string("x2_51_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_51_cast_fp16 = slice_by_index(begin = x2_51_begin_0, end = x2_51_end_0, end_mask = x2_51_end_mask_0, x = k_25_cast_fp16)[name = string("x2_51_cast_fp16")]; fp16 const_29_promoted_to_fp16 = const()[name = string("const_29_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2762_cast_fp16 = mul(x = x2_51_cast_fp16, y = const_29_promoted_to_fp16)[name = string("op_2762_cast_fp16")]; bool var_2764_interleave_0 = const()[name = string("op_2764_interleave_0"), val = bool(false)]; tensor var_2764_cast_fp16 = concat(axis = var_72, interleave = var_2764_interleave_0, values = (var_2762_cast_fp16, x1_51_cast_fp16))[name = string("op_2764_cast_fp16")]; tensor var_2765_cast_fp16 = mul(x = var_2764_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2765_cast_fp16")]; tensor k_state_25_cast_fp16 = add(x = var_2751_cast_fp16, y = var_2765_cast_fp16)[name = string("k_state_25_cast_fp16")]; tensor expand_dims_144 = const()[name = string("expand_dims_144"), val = tensor([0])]; tensor expand_dims_145 = const()[name = string("expand_dims_145"), val = tensor([0])]; tensor expand_dims_147 = const()[name = string("expand_dims_147"), val = tensor([0])]; tensor concat_234_values0_0 = const()[name = string("concat_234_values0_0"), val = tensor([12])]; int32 concat_234_axis_0 = const()[name = string("concat_234_axis_0"), val = int32(0)]; bool concat_234_interleave_0 = const()[name = string("concat_234_interleave_0"), val = bool(false)]; tensor concat_234 = concat(axis = concat_234_axis_0, interleave = concat_234_interleave_0, values = (concat_234_values0_0, expand_dims_144, expand_dims_145, expand_dims_2, expand_dims_147))[name = string("concat_234")]; tensor key_cache_internal_tensor_assign_13_stride_0 = const()[name = string("key_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_234, begin_mask = key_cache_internal_tensor_assign_13_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_13_squeeze_mask_0, stride = key_cache_internal_tensor_assign_13_stride_0, update = k_state_25_cast_fp16, x = coreml_update_state_78)[name = string("key_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_13_cast_fp16, input = key_cache)[name = string("coreml_update_state_80_write_state")]; tensor coreml_update_state_80 = read_state(input = key_cache)[name = string("coreml_update_state_80")]; tensor value_cache_internal_tensor_assign_13_stride_0 = const()[name = string("value_cache_internal_tensor_assign_13_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_13_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_13_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_13_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_13_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_25_cast_fp16 = transpose(perm = v_state_25_perm_0, x = var_2731_cast_fp16)[name = string("transpose_61")]; tensor value_cache_internal_tensor_assign_13_cast_fp16 = slice_update(begin = concat_234, begin_mask = value_cache_internal_tensor_assign_13_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_13_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_13_squeeze_mask_0, stride = value_cache_internal_tensor_assign_13_stride_0, update = v_state_25_cast_fp16, x = coreml_update_state_79)[name = string("value_cache_internal_tensor_assign_13_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_13_cast_fp16, input = value_cache)[name = string("coreml_update_state_81_write_state")]; tensor coreml_update_state_81 = read_state(input = value_cache)[name = string("coreml_update_state_81")]; tensor var_2788_begin_0 = const()[name = string("op_2788_begin_0"), val = tensor([12, 0, 0, 0, 0])]; tensor var_2788_end_0 = const()[name = string("op_2788_end_0"), val = tensor([13, 1, 8, 2048, 128])]; tensor var_2788_end_mask_0 = const()[name = string("op_2788_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2788_squeeze_mask_0 = const()[name = string("op_2788_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2788_cast_fp16 = slice_by_index(begin = var_2788_begin_0, end = var_2788_end_0, end_mask = var_2788_end_mask_0, squeeze_mask = var_2788_squeeze_mask_0, x = coreml_update_state_80)[name = string("op_2788_cast_fp16")]; tensor var_2791_begin_0 = const()[name = string("op_2791_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2791_end_mask_0 = const()[name = string("op_2791_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2791_cast_fp16 = slice_by_index(begin = var_2791_begin_0, end = concat_12, end_mask = var_2791_end_mask_0, x = var_2788_cast_fp16)[name = string("op_2791_cast_fp16")]; tensor var_2793_begin_0 = const()[name = string("op_2793_begin_0"), val = tensor([12, 0, 0, 0, 0])]; tensor var_2793_end_0 = const()[name = string("op_2793_end_0"), val = tensor([13, 1, 8, 2048, 128])]; tensor var_2793_end_mask_0 = const()[name = string("op_2793_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2793_squeeze_mask_0 = const()[name = string("op_2793_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2793_cast_fp16 = slice_by_index(begin = var_2793_begin_0, end = var_2793_end_0, end_mask = var_2793_end_mask_0, squeeze_mask = var_2793_squeeze_mask_0, x = coreml_update_state_81)[name = string("op_2793_cast_fp16")]; tensor var_2796_begin_0 = const()[name = string("op_2796_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2796_end_mask_0 = const()[name = string("op_2796_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2796_cast_fp16 = slice_by_index(begin = var_2796_begin_0, end = concat_12, end_mask = var_2796_end_mask_0, x = var_2793_cast_fp16)[name = string("op_2796_cast_fp16")]; tensor var_2798_shape_cast_fp16 = shape(x = var_2791_cast_fp16)[name = string("op_2798_shape_cast_fp16")]; int32 gather_229 = const()[name = string("gather_229"), val = int32(1)]; int32 gather_230 = const()[name = string("gather_230"), val = int32(8)]; int32 gather_231_axis_0 = const()[name = string("gather_231_axis_0"), val = int32(0)]; int32 gather_231_batch_dims_0 = const()[name = string("gather_231_batch_dims_0"), val = int32(0)]; bool gather_231_validate_indices_0 = const()[name = string("gather_231_validate_indices_0"), val = bool(false)]; string var_2798_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2798_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_231_to_uint16 = const()[name = string("select_231_to_uint16"), val = uint16(2)]; tensor var_2798_shape_cast_fp16_to_uint16 = cast(dtype = var_2798_shape_cast_fp16_to_uint16_dtype_0, x = var_2798_shape_cast_fp16)[name = string("cast_640")]; uint16 gather_231_cast_uint16 = gather(axis = gather_231_axis_0, batch_dims = gather_231_batch_dims_0, indices = select_231_to_uint16, validate_indices = gather_231_validate_indices_0, x = var_2798_shape_cast_fp16_to_uint16)[name = string("gather_231_cast_uint16")]; string gather_231_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_231_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_232 = const()[name = string("gather_232"), val = int32(128)]; tensor var_2805_axes_0 = const()[name = string("op_2805_axes_0"), val = tensor([2])]; tensor var_2805_cast_fp16 = expand_dims(axes = var_2805_axes_0, x = var_2791_cast_fp16)[name = string("op_2805_cast_fp16")]; int32 concat_242_axis_0 = const()[name = string("concat_242_axis_0"), val = int32(0)]; bool concat_242_interleave_0 = const()[name = string("concat_242_interleave_0"), val = bool(false)]; int32 gather_231_cast_uint16_to_int32 = cast(dtype = gather_231_cast_uint16_to_int32_dtype_0, x = gather_231_cast_uint16)[name = string("cast_639")]; tensor concat_242 = concat(axis = concat_242_axis_0, interleave = concat_242_interleave_0, values = (gather_229, gather_230, var_78, gather_231_cast_uint16_to_int32, gather_232))[name = string("concat_242")]; tensor shape_257_cast_fp16 = shape(x = var_2805_cast_fp16)[name = string("shape_257_cast_fp16")]; tensor real_div_24 = real_div(x = concat_242, y = shape_257_cast_fp16)[name = string("real_div_24")]; tensor hidden_states_579_cast_fp16 = tile(reps = real_div_24, x = var_2805_cast_fp16)[name = string("hidden_states_579_cast_fp16")]; tensor concat_243x = const()[name = string("concat_243x"), val = tensor([1, 16, -1, 128])]; tensor key_49_cast_fp16 = reshape(shape = concat_243x, x = hidden_states_579_cast_fp16)[name = string("key_49_cast_fp16")]; tensor var_2815_shape_cast_fp16 = shape(x = var_2796_cast_fp16)[name = string("op_2815_shape_cast_fp16")]; int32 gather_233 = const()[name = string("gather_233"), val = int32(1)]; int32 gather_234 = const()[name = string("gather_234"), val = int32(8)]; int32 gather_235_axis_0 = const()[name = string("gather_235_axis_0"), val = int32(0)]; int32 gather_235_batch_dims_0 = const()[name = string("gather_235_batch_dims_0"), val = int32(0)]; bool gather_235_validate_indices_0 = const()[name = string("gather_235_validate_indices_0"), val = bool(false)]; string var_2815_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2815_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_235_to_uint16 = const()[name = string("select_235_to_uint16"), val = uint16(2)]; tensor var_2815_shape_cast_fp16_to_uint16 = cast(dtype = var_2815_shape_cast_fp16_to_uint16_dtype_0, x = var_2815_shape_cast_fp16)[name = string("cast_638")]; uint16 gather_235_cast_uint16 = gather(axis = gather_235_axis_0, batch_dims = gather_235_batch_dims_0, indices = select_235_to_uint16, validate_indices = gather_235_validate_indices_0, x = var_2815_shape_cast_fp16_to_uint16)[name = string("gather_235_cast_uint16")]; string gather_235_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_235_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_236 = const()[name = string("gather_236"), val = int32(128)]; tensor var_2822_axes_0 = const()[name = string("op_2822_axes_0"), val = tensor([2])]; tensor var_2822_cast_fp16 = expand_dims(axes = var_2822_axes_0, x = var_2796_cast_fp16)[name = string("op_2822_cast_fp16")]; int32 concat_244_axis_0 = const()[name = string("concat_244_axis_0"), val = int32(0)]; bool concat_244_interleave_0 = const()[name = string("concat_244_interleave_0"), val = bool(false)]; int32 gather_235_cast_uint16_to_int32 = cast(dtype = gather_235_cast_uint16_to_int32_dtype_0, x = gather_235_cast_uint16)[name = string("cast_637")]; tensor concat_244 = concat(axis = concat_244_axis_0, interleave = concat_244_interleave_0, values = (gather_233, gather_234, var_78, gather_235_cast_uint16_to_int32, gather_236))[name = string("concat_244")]; tensor shape_262_cast_fp16 = shape(x = var_2822_cast_fp16)[name = string("shape_262_cast_fp16")]; tensor real_div_25 = real_div(x = concat_244, y = shape_262_cast_fp16)[name = string("real_div_25")]; tensor hidden_states_583_cast_fp16 = tile(reps = real_div_25, x = var_2822_cast_fp16)[name = string("hidden_states_583_cast_fp16")]; tensor concat_245x = const()[name = string("concat_245x"), val = tensor([1, 16, -1, 128])]; tensor value_49_cast_fp16 = reshape(shape = concat_245x, x = hidden_states_583_cast_fp16)[name = string("value_49_cast_fp16")]; tensor var_2832_shape_cast_fp16 = shape(x = key_49_cast_fp16)[name = string("op_2832_shape_cast_fp16")]; int32 gather_237_axis_0 = const()[name = string("gather_237_axis_0"), val = int32(0)]; int32 gather_237_batch_dims_0 = const()[name = string("gather_237_batch_dims_0"), val = int32(0)]; bool gather_237_validate_indices_0 = const()[name = string("gather_237_validate_indices_0"), val = bool(false)]; string var_2832_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2832_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_237_to_uint16 = const()[name = string("select_237_to_uint16"), val = uint16(2)]; tensor var_2832_shape_cast_fp16_to_uint16 = cast(dtype = var_2832_shape_cast_fp16_to_uint16_dtype_0, x = var_2832_shape_cast_fp16)[name = string("cast_636")]; uint16 gather_237_cast_uint16 = gather(axis = gather_237_axis_0, batch_dims = gather_237_batch_dims_0, indices = select_237_to_uint16, validate_indices = gather_237_validate_indices_0, x = var_2832_shape_cast_fp16_to_uint16)[name = string("gather_237_cast_uint16")]; string gather_237_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_237_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_246_values0_0 = const()[name = string("concat_246_values0_0"), val = int32(1)]; int32 concat_246_values1_0 = const()[name = string("concat_246_values1_0"), val = int32(1)]; int32 concat_246_values2_0 = const()[name = string("concat_246_values2_0"), val = int32(0)]; int32 concat_246_axis_0 = const()[name = string("concat_246_axis_0"), val = int32(0)]; bool concat_246_interleave_0 = const()[name = string("concat_246_interleave_0"), val = bool(false)]; int32 gather_237_cast_uint16_to_int32 = cast(dtype = gather_237_cast_uint16_to_int32_dtype_0, x = gather_237_cast_uint16)[name = string("cast_635")]; tensor concat_246 = concat(axis = concat_246_axis_0, interleave = concat_246_interleave_0, values = (concat_246_values0_0, concat_246_values1_0, concat_246_values2_0, gather_237_cast_uint16_to_int32))[name = string("concat_246")]; tensor attention_mask_25_begin_0 = const()[name = string("attention_mask_25_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_25_end_mask_0 = const()[name = string("attention_mask_25_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_25_cast_fp16 = slice_by_index(begin = attention_mask_25_begin_0, end = concat_246, end_mask = attention_mask_25_end_mask_0, x = causal_mask)[name = string("attention_mask_25_cast_fp16")]; tensor mul_12_cast_fp16 = mul(x = query_49_cast_fp16, y = var_85_to_fp16)[name = string("mul_12_cast_fp16")]; bool matmul_12_transpose_y_0 = const()[name = string("matmul_12_transpose_y_0"), val = bool(true)]; bool matmul_12_transpose_x_0 = const()[name = string("matmul_12_transpose_x_0"), val = bool(false)]; tensor matmul_12_cast_fp16 = matmul(transpose_x = matmul_12_transpose_x_0, transpose_y = matmul_12_transpose_y_0, x = mul_12_cast_fp16, y = key_49_cast_fp16)[name = string("matmul_12_cast_fp16")]; tensor add_250_cast_fp16 = add(x = matmul_12_cast_fp16, y = attention_mask_25_cast_fp16)[name = string("add_250_cast_fp16")]; int32 softmax_12_axis_0 = const()[name = string("softmax_12_axis_0"), val = int32(-1)]; tensor softmax_12_cast_fp16 = softmax(axis = softmax_12_axis_0, x = add_250_cast_fp16)[name = string("softmax_12_cast_fp16")]; bool attn_output_49_transpose_x_0 = const()[name = string("attn_output_49_transpose_x_0"), val = bool(false)]; bool attn_output_49_transpose_y_0 = const()[name = string("attn_output_49_transpose_y_0"), val = bool(false)]; tensor attn_output_49_cast_fp16 = matmul(transpose_x = attn_output_49_transpose_x_0, transpose_y = attn_output_49_transpose_y_0, x = softmax_12_cast_fp16, y = value_49_cast_fp16)[name = string("attn_output_49_cast_fp16")]; tensor var_2841_perm_0 = const()[name = string("op_2841_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_247_axis_0 = const()[name = string("concat_247_axis_0"), val = int32(0)]; bool concat_247_interleave_0 = const()[name = string("concat_247_interleave_0"), val = bool(false)]; int32 gather_221_cast_uint16_to_int32 = cast(dtype = gather_221_cast_uint16_to_int32_dtype_0, x = gather_221_cast_uint16)[name = string("cast_641")]; tensor concat_247 = concat(axis = concat_247_axis_0, interleave = concat_247_interleave_0, values = (gather_220, gather_221_cast_uint16_to_int32, var_72))[name = string("concat_247")]; tensor var_2841_cast_fp16 = transpose(perm = var_2841_perm_0, x = attn_output_49_cast_fp16)[name = string("transpose_60")]; tensor var_2844_cast_fp16 = reshape(shape = concat_247, x = var_2841_cast_fp16)[name = string("op_2844_cast_fp16")]; tensor model_model_layers_12_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_12_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(697120384)))]; tensor linear_87_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_12_self_attn_o_proj_weight_to_fp16, x = var_2844_cast_fp16)[name = string("linear_87_cast_fp16")]; tensor hidden_states_587_cast_fp16 = add(x = hidden_states_551_cast_fp16, y = linear_87_cast_fp16)[name = string("hidden_states_587_cast_fp16")]; fp16 var_78_promoted_51_to_fp16 = const()[name = string("op_78_promoted_51_to_fp16"), val = fp16(0x1p+1)]; tensor var_2851_cast_fp16 = pow(x = hidden_states_587_cast_fp16, y = var_78_promoted_51_to_fp16)[name = string("op_2851_cast_fp16")]; tensor variance_103_axes_0 = const()[name = string("variance_103_axes_0"), val = tensor([-1])]; bool variance_103_keep_dims_0 = const()[name = string("variance_103_keep_dims_0"), val = bool(true)]; tensor variance_103_cast_fp16 = reduce_mean(axes = variance_103_axes_0, keep_dims = variance_103_keep_dims_0, x = var_2851_cast_fp16)[name = string("variance_103_cast_fp16")]; fp16 var_2854_to_fp16 = const()[name = string("op_2854_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2855_cast_fp16 = add(x = variance_103_cast_fp16, y = var_2854_to_fp16)[name = string("op_2855_cast_fp16")]; fp32 var_2856_epsilon_0 = const()[name = string("op_2856_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2856_cast_fp16 = rsqrt(epsilon = var_2856_epsilon_0, x = var_2855_cast_fp16)[name = string("op_2856_cast_fp16")]; tensor hidden_states_591_cast_fp16 = mul(x = hidden_states_587_cast_fp16, y = var_2856_cast_fp16)[name = string("hidden_states_591_cast_fp16")]; tensor model_model_layers_12_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_12_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(701314752)))]; tensor input_99_cast_fp16 = mul(x = model_model_layers_12_post_attention_layernorm_weight_to_fp16, y = hidden_states_591_cast_fp16)[name = string("input_99_cast_fp16")]; tensor model_model_layers_12_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_12_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(701316864)))]; tensor linear_88_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_12_mlp_gate_proj_weight_to_fp16, x = input_99_cast_fp16)[name = string("linear_88_cast_fp16")]; tensor var_2868_cast_fp16 = silu(x = linear_88_cast_fp16)[name = string("op_2868_cast_fp16")]; tensor model_model_layers_12_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_12_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(707608384)))]; tensor linear_89_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_12_mlp_up_proj_weight_to_fp16, x = input_99_cast_fp16)[name = string("linear_89_cast_fp16")]; tensor input_103_cast_fp16 = mul(x = var_2868_cast_fp16, y = linear_89_cast_fp16)[name = string("input_103_cast_fp16")]; tensor model_model_layers_12_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_12_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(713899904)))]; tensor linear_90_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_12_mlp_down_proj_weight_to_fp16, x = input_103_cast_fp16)[name = string("linear_90_cast_fp16")]; tensor hidden_states_597_cast_fp16 = add(x = hidden_states_587_cast_fp16, y = linear_90_cast_fp16)[name = string("hidden_states_597_cast_fp16")]; fp16 var_78_promoted_52_to_fp16 = const()[name = string("op_78_promoted_52_to_fp16"), val = fp16(0x1p+1)]; tensor var_2881_cast_fp16 = pow(x = hidden_states_597_cast_fp16, y = var_78_promoted_52_to_fp16)[name = string("op_2881_cast_fp16")]; tensor variance_105_axes_0 = const()[name = string("variance_105_axes_0"), val = tensor([-1])]; bool variance_105_keep_dims_0 = const()[name = string("variance_105_keep_dims_0"), val = bool(true)]; tensor variance_105_cast_fp16 = reduce_mean(axes = variance_105_axes_0, keep_dims = variance_105_keep_dims_0, x = var_2881_cast_fp16)[name = string("variance_105_cast_fp16")]; fp16 var_2884_to_fp16 = const()[name = string("op_2884_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2885_cast_fp16 = add(x = variance_105_cast_fp16, y = var_2884_to_fp16)[name = string("op_2885_cast_fp16")]; fp32 var_2886_epsilon_0 = const()[name = string("op_2886_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2886_cast_fp16 = rsqrt(epsilon = var_2886_epsilon_0, x = var_2885_cast_fp16)[name = string("op_2886_cast_fp16")]; tensor hidden_states_601_cast_fp16 = mul(x = hidden_states_597_cast_fp16, y = var_2886_cast_fp16)[name = string("hidden_states_601_cast_fp16")]; tensor model_model_layers_13_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(720191424)))]; tensor hidden_states_605_cast_fp16 = mul(x = model_model_layers_13_input_layernorm_weight_to_fp16, y = hidden_states_601_cast_fp16)[name = string("hidden_states_605_cast_fp16")]; tensor var_2899_shape_cast_fp16 = shape(x = hidden_states_605_cast_fp16)[name = string("op_2899_shape_cast_fp16")]; int32 gather_238 = const()[name = string("gather_238"), val = int32(1)]; int32 gather_239_axis_0 = const()[name = string("gather_239_axis_0"), val = int32(0)]; int32 gather_239_batch_dims_0 = const()[name = string("gather_239_batch_dims_0"), val = int32(0)]; bool gather_239_validate_indices_0 = const()[name = string("gather_239_validate_indices_0"), val = bool(false)]; string var_2899_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_2899_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_239_to_uint16 = const()[name = string("select_239_to_uint16"), val = uint16(1)]; tensor var_2899_shape_cast_fp16_to_uint16 = cast(dtype = var_2899_shape_cast_fp16_to_uint16_dtype_0, x = var_2899_shape_cast_fp16)[name = string("cast_634")]; uint16 gather_239_cast_uint16 = gather(axis = gather_239_axis_0, batch_dims = gather_239_batch_dims_0, indices = select_239_to_uint16, validate_indices = gather_239_validate_indices_0, x = var_2899_shape_cast_fp16_to_uint16)[name = string("gather_239_cast_uint16")]; string gather_239_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_239_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_13_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(720193536)))]; tensor linear_91_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_13_self_attn_q_proj_weight_to_fp16, x = hidden_states_605_cast_fp16)[name = string("linear_91_cast_fp16")]; tensor concat_248x = const()[name = string("concat_248x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_607_cast_fp16 = reshape(shape = concat_248x, x = linear_91_cast_fp16)[name = string("hidden_states_607_cast_fp16")]; fp16 var_78_promoted_53_to_fp16 = const()[name = string("op_78_promoted_53_to_fp16"), val = fp16(0x1p+1)]; tensor var_2907_cast_fp16 = pow(x = hidden_states_607_cast_fp16, y = var_78_promoted_53_to_fp16)[name = string("op_2907_cast_fp16")]; tensor variance_107_axes_0 = const()[name = string("variance_107_axes_0"), val = tensor([-1])]; bool variance_107_keep_dims_0 = const()[name = string("variance_107_keep_dims_0"), val = bool(true)]; tensor variance_107_cast_fp16 = reduce_mean(axes = variance_107_axes_0, keep_dims = variance_107_keep_dims_0, x = var_2907_cast_fp16)[name = string("variance_107_cast_fp16")]; fp16 var_2910_to_fp16 = const()[name = string("op_2910_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2911_cast_fp16 = add(x = variance_107_cast_fp16, y = var_2910_to_fp16)[name = string("op_2911_cast_fp16")]; fp32 var_2912_epsilon_0 = const()[name = string("op_2912_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2912_cast_fp16 = rsqrt(epsilon = var_2912_epsilon_0, x = var_2911_cast_fp16)[name = string("op_2912_cast_fp16")]; tensor hidden_states_611_cast_fp16 = mul(x = hidden_states_607_cast_fp16, y = var_2912_cast_fp16)[name = string("hidden_states_611_cast_fp16")]; tensor model_model_layers_13_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724387904)))]; tensor var_2915_cast_fp16 = mul(x = model_model_layers_13_self_attn_q_norm_weight_to_fp16, y = hidden_states_611_cast_fp16)[name = string("op_2915_cast_fp16")]; tensor q_27_perm_0 = const()[name = string("q_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_13_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(724388224)))]; tensor linear_92_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_13_self_attn_k_proj_weight_to_fp16, x = hidden_states_605_cast_fp16)[name = string("linear_92_cast_fp16")]; tensor concat_249x = const()[name = string("concat_249x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_615_cast_fp16 = reshape(shape = concat_249x, x = linear_92_cast_fp16)[name = string("hidden_states_615_cast_fp16")]; fp16 var_78_promoted_54_to_fp16 = const()[name = string("op_78_promoted_54_to_fp16"), val = fp16(0x1p+1)]; tensor var_2923_cast_fp16 = pow(x = hidden_states_615_cast_fp16, y = var_78_promoted_54_to_fp16)[name = string("op_2923_cast_fp16")]; tensor variance_109_axes_0 = const()[name = string("variance_109_axes_0"), val = tensor([-1])]; bool variance_109_keep_dims_0 = const()[name = string("variance_109_keep_dims_0"), val = bool(true)]; tensor variance_109_cast_fp16 = reduce_mean(axes = variance_109_axes_0, keep_dims = variance_109_keep_dims_0, x = var_2923_cast_fp16)[name = string("variance_109_cast_fp16")]; fp16 var_2926_to_fp16 = const()[name = string("op_2926_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_2927_cast_fp16 = add(x = variance_109_cast_fp16, y = var_2926_to_fp16)[name = string("op_2927_cast_fp16")]; fp32 var_2928_epsilon_0 = const()[name = string("op_2928_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_2928_cast_fp16 = rsqrt(epsilon = var_2928_epsilon_0, x = var_2927_cast_fp16)[name = string("op_2928_cast_fp16")]; tensor hidden_states_619_cast_fp16 = mul(x = hidden_states_615_cast_fp16, y = var_2928_cast_fp16)[name = string("hidden_states_619_cast_fp16")]; tensor model_model_layers_13_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726485440)))]; tensor var_2931_cast_fp16 = mul(x = model_model_layers_13_self_attn_k_norm_weight_to_fp16, y = hidden_states_619_cast_fp16)[name = string("op_2931_cast_fp16")]; tensor k_27_perm_0 = const()[name = string("k_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_13_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(726485760)))]; tensor linear_93_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_13_self_attn_v_proj_weight_to_fp16, x = hidden_states_605_cast_fp16)[name = string("linear_93_cast_fp16")]; tensor concat_250x = const()[name = string("concat_250x"), val = tensor([1, -1, 8, 128])]; tensor var_2936_cast_fp16 = reshape(shape = concat_250x, x = linear_93_cast_fp16)[name = string("op_2936_cast_fp16")]; tensor v_state_27_perm_0 = const()[name = string("v_state_27_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_27_cast_fp16 = transpose(perm = q_27_perm_0, x = var_2915_cast_fp16)[name = string("transpose_59")]; tensor var_2940_cast_fp16 = mul(x = q_27_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2940_cast_fp16")]; tensor x1_53_begin_0 = const()[name = string("x1_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_53_end_0 = const()[name = string("x1_53_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_53_end_mask_0 = const()[name = string("x1_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_53_cast_fp16 = slice_by_index(begin = x1_53_begin_0, end = x1_53_end_0, end_mask = x1_53_end_mask_0, x = q_27_cast_fp16)[name = string("x1_53_cast_fp16")]; tensor x2_53_begin_0 = const()[name = string("x2_53_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_53_end_0 = const()[name = string("x2_53_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_53_end_mask_0 = const()[name = string("x2_53_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_53_cast_fp16 = slice_by_index(begin = x2_53_begin_0, end = x2_53_end_0, end_mask = x2_53_end_mask_0, x = q_27_cast_fp16)[name = string("x2_53_cast_fp16")]; fp16 const_30_promoted_to_fp16 = const()[name = string("const_30_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2951_cast_fp16 = mul(x = x2_53_cast_fp16, y = const_30_promoted_to_fp16)[name = string("op_2951_cast_fp16")]; bool var_2953_interleave_0 = const()[name = string("op_2953_interleave_0"), val = bool(false)]; tensor var_2953_cast_fp16 = concat(axis = var_72, interleave = var_2953_interleave_0, values = (var_2951_cast_fp16, x1_53_cast_fp16))[name = string("op_2953_cast_fp16")]; tensor var_2954_cast_fp16 = mul(x = var_2953_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2954_cast_fp16")]; tensor query_53_cast_fp16 = add(x = var_2940_cast_fp16, y = var_2954_cast_fp16)[name = string("query_53_cast_fp16")]; tensor k_27_cast_fp16 = transpose(perm = k_27_perm_0, x = var_2931_cast_fp16)[name = string("transpose_58")]; tensor var_2956_cast_fp16 = mul(x = k_27_cast_fp16, y = cos_5_cast_fp16)[name = string("op_2956_cast_fp16")]; tensor x1_55_begin_0 = const()[name = string("x1_55_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_55_end_0 = const()[name = string("x1_55_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_55_end_mask_0 = const()[name = string("x1_55_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_55_cast_fp16 = slice_by_index(begin = x1_55_begin_0, end = x1_55_end_0, end_mask = x1_55_end_mask_0, x = k_27_cast_fp16)[name = string("x1_55_cast_fp16")]; tensor x2_55_begin_0 = const()[name = string("x2_55_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_55_end_0 = const()[name = string("x2_55_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_55_end_mask_0 = const()[name = string("x2_55_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_55_cast_fp16 = slice_by_index(begin = x2_55_begin_0, end = x2_55_end_0, end_mask = x2_55_end_mask_0, x = k_27_cast_fp16)[name = string("x2_55_cast_fp16")]; fp16 const_31_promoted_to_fp16 = const()[name = string("const_31_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_2967_cast_fp16 = mul(x = x2_55_cast_fp16, y = const_31_promoted_to_fp16)[name = string("op_2967_cast_fp16")]; bool var_2969_interleave_0 = const()[name = string("op_2969_interleave_0"), val = bool(false)]; tensor var_2969_cast_fp16 = concat(axis = var_72, interleave = var_2969_interleave_0, values = (var_2967_cast_fp16, x1_55_cast_fp16))[name = string("op_2969_cast_fp16")]; tensor var_2970_cast_fp16 = mul(x = var_2969_cast_fp16, y = sin_5_cast_fp16)[name = string("op_2970_cast_fp16")]; tensor k_state_27_cast_fp16 = add(x = var_2956_cast_fp16, y = var_2970_cast_fp16)[name = string("k_state_27_cast_fp16")]; tensor expand_dims_156 = const()[name = string("expand_dims_156"), val = tensor([0])]; tensor expand_dims_157 = const()[name = string("expand_dims_157"), val = tensor([0])]; tensor expand_dims_159 = const()[name = string("expand_dims_159"), val = tensor([0])]; tensor concat_253_values0_0 = const()[name = string("concat_253_values0_0"), val = tensor([13])]; int32 concat_253_axis_0 = const()[name = string("concat_253_axis_0"), val = int32(0)]; bool concat_253_interleave_0 = const()[name = string("concat_253_interleave_0"), val = bool(false)]; tensor concat_253 = concat(axis = concat_253_axis_0, interleave = concat_253_interleave_0, values = (concat_253_values0_0, expand_dims_156, expand_dims_157, expand_dims_2, expand_dims_159))[name = string("concat_253")]; tensor key_cache_internal_tensor_assign_14_stride_0 = const()[name = string("key_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_253, begin_mask = key_cache_internal_tensor_assign_14_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_14_squeeze_mask_0, stride = key_cache_internal_tensor_assign_14_stride_0, update = k_state_27_cast_fp16, x = coreml_update_state_80)[name = string("key_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_14_cast_fp16, input = key_cache)[name = string("coreml_update_state_82_write_state")]; tensor coreml_update_state_82 = read_state(input = key_cache)[name = string("coreml_update_state_82")]; tensor value_cache_internal_tensor_assign_14_stride_0 = const()[name = string("value_cache_internal_tensor_assign_14_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_14_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_14_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_14_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_14_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_27_cast_fp16 = transpose(perm = v_state_27_perm_0, x = var_2936_cast_fp16)[name = string("transpose_57")]; tensor value_cache_internal_tensor_assign_14_cast_fp16 = slice_update(begin = concat_253, begin_mask = value_cache_internal_tensor_assign_14_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_14_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_14_squeeze_mask_0, stride = value_cache_internal_tensor_assign_14_stride_0, update = v_state_27_cast_fp16, x = coreml_update_state_81)[name = string("value_cache_internal_tensor_assign_14_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_14_cast_fp16, input = value_cache)[name = string("coreml_update_state_83_write_state")]; tensor coreml_update_state_83 = read_state(input = value_cache)[name = string("coreml_update_state_83")]; tensor var_2993_begin_0 = const()[name = string("op_2993_begin_0"), val = tensor([13, 0, 0, 0, 0])]; tensor var_2993_end_0 = const()[name = string("op_2993_end_0"), val = tensor([14, 1, 8, 2048, 128])]; tensor var_2993_end_mask_0 = const()[name = string("op_2993_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2993_squeeze_mask_0 = const()[name = string("op_2993_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2993_cast_fp16 = slice_by_index(begin = var_2993_begin_0, end = var_2993_end_0, end_mask = var_2993_end_mask_0, squeeze_mask = var_2993_squeeze_mask_0, x = coreml_update_state_82)[name = string("op_2993_cast_fp16")]; tensor var_2996_begin_0 = const()[name = string("op_2996_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_2996_end_mask_0 = const()[name = string("op_2996_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_2996_cast_fp16 = slice_by_index(begin = var_2996_begin_0, end = concat_12, end_mask = var_2996_end_mask_0, x = var_2993_cast_fp16)[name = string("op_2996_cast_fp16")]; tensor var_2998_begin_0 = const()[name = string("op_2998_begin_0"), val = tensor([13, 0, 0, 0, 0])]; tensor var_2998_end_0 = const()[name = string("op_2998_end_0"), val = tensor([14, 1, 8, 2048, 128])]; tensor var_2998_end_mask_0 = const()[name = string("op_2998_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_2998_squeeze_mask_0 = const()[name = string("op_2998_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_2998_cast_fp16 = slice_by_index(begin = var_2998_begin_0, end = var_2998_end_0, end_mask = var_2998_end_mask_0, squeeze_mask = var_2998_squeeze_mask_0, x = coreml_update_state_83)[name = string("op_2998_cast_fp16")]; tensor var_3001_begin_0 = const()[name = string("op_3001_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3001_end_mask_0 = const()[name = string("op_3001_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3001_cast_fp16 = slice_by_index(begin = var_3001_begin_0, end = concat_12, end_mask = var_3001_end_mask_0, x = var_2998_cast_fp16)[name = string("op_3001_cast_fp16")]; tensor var_3003_shape_cast_fp16 = shape(x = var_2996_cast_fp16)[name = string("op_3003_shape_cast_fp16")]; int32 gather_247 = const()[name = string("gather_247"), val = int32(1)]; int32 gather_248 = const()[name = string("gather_248"), val = int32(8)]; int32 gather_249_axis_0 = const()[name = string("gather_249_axis_0"), val = int32(0)]; int32 gather_249_batch_dims_0 = const()[name = string("gather_249_batch_dims_0"), val = int32(0)]; bool gather_249_validate_indices_0 = const()[name = string("gather_249_validate_indices_0"), val = bool(false)]; string var_3003_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3003_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_249_to_uint16 = const()[name = string("select_249_to_uint16"), val = uint16(2)]; tensor var_3003_shape_cast_fp16_to_uint16 = cast(dtype = var_3003_shape_cast_fp16_to_uint16_dtype_0, x = var_3003_shape_cast_fp16)[name = string("cast_632")]; uint16 gather_249_cast_uint16 = gather(axis = gather_249_axis_0, batch_dims = gather_249_batch_dims_0, indices = select_249_to_uint16, validate_indices = gather_249_validate_indices_0, x = var_3003_shape_cast_fp16_to_uint16)[name = string("gather_249_cast_uint16")]; string gather_249_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_249_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_250 = const()[name = string("gather_250"), val = int32(128)]; tensor var_3010_axes_0 = const()[name = string("op_3010_axes_0"), val = tensor([2])]; tensor var_3010_cast_fp16 = expand_dims(axes = var_3010_axes_0, x = var_2996_cast_fp16)[name = string("op_3010_cast_fp16")]; int32 concat_261_axis_0 = const()[name = string("concat_261_axis_0"), val = int32(0)]; bool concat_261_interleave_0 = const()[name = string("concat_261_interleave_0"), val = bool(false)]; int32 gather_249_cast_uint16_to_int32 = cast(dtype = gather_249_cast_uint16_to_int32_dtype_0, x = gather_249_cast_uint16)[name = string("cast_631")]; tensor concat_261 = concat(axis = concat_261_axis_0, interleave = concat_261_interleave_0, values = (gather_247, gather_248, var_78, gather_249_cast_uint16_to_int32, gather_250))[name = string("concat_261")]; tensor shape_277_cast_fp16 = shape(x = var_3010_cast_fp16)[name = string("shape_277_cast_fp16")]; tensor real_div_26 = real_div(x = concat_261, y = shape_277_cast_fp16)[name = string("real_div_26")]; tensor hidden_states_625_cast_fp16 = tile(reps = real_div_26, x = var_3010_cast_fp16)[name = string("hidden_states_625_cast_fp16")]; tensor concat_262x = const()[name = string("concat_262x"), val = tensor([1, 16, -1, 128])]; tensor key_53_cast_fp16 = reshape(shape = concat_262x, x = hidden_states_625_cast_fp16)[name = string("key_53_cast_fp16")]; tensor var_3020_shape_cast_fp16 = shape(x = var_3001_cast_fp16)[name = string("op_3020_shape_cast_fp16")]; int32 gather_251 = const()[name = string("gather_251"), val = int32(1)]; int32 gather_252 = const()[name = string("gather_252"), val = int32(8)]; int32 gather_253_axis_0 = const()[name = string("gather_253_axis_0"), val = int32(0)]; int32 gather_253_batch_dims_0 = const()[name = string("gather_253_batch_dims_0"), val = int32(0)]; bool gather_253_validate_indices_0 = const()[name = string("gather_253_validate_indices_0"), val = bool(false)]; string var_3020_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3020_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_253_to_uint16 = const()[name = string("select_253_to_uint16"), val = uint16(2)]; tensor var_3020_shape_cast_fp16_to_uint16 = cast(dtype = var_3020_shape_cast_fp16_to_uint16_dtype_0, x = var_3020_shape_cast_fp16)[name = string("cast_630")]; uint16 gather_253_cast_uint16 = gather(axis = gather_253_axis_0, batch_dims = gather_253_batch_dims_0, indices = select_253_to_uint16, validate_indices = gather_253_validate_indices_0, x = var_3020_shape_cast_fp16_to_uint16)[name = string("gather_253_cast_uint16")]; string gather_253_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_253_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_254 = const()[name = string("gather_254"), val = int32(128)]; tensor var_3027_axes_0 = const()[name = string("op_3027_axes_0"), val = tensor([2])]; tensor var_3027_cast_fp16 = expand_dims(axes = var_3027_axes_0, x = var_3001_cast_fp16)[name = string("op_3027_cast_fp16")]; int32 concat_263_axis_0 = const()[name = string("concat_263_axis_0"), val = int32(0)]; bool concat_263_interleave_0 = const()[name = string("concat_263_interleave_0"), val = bool(false)]; int32 gather_253_cast_uint16_to_int32 = cast(dtype = gather_253_cast_uint16_to_int32_dtype_0, x = gather_253_cast_uint16)[name = string("cast_629")]; tensor concat_263 = concat(axis = concat_263_axis_0, interleave = concat_263_interleave_0, values = (gather_251, gather_252, var_78, gather_253_cast_uint16_to_int32, gather_254))[name = string("concat_263")]; tensor shape_282_cast_fp16 = shape(x = var_3027_cast_fp16)[name = string("shape_282_cast_fp16")]; tensor real_div_27 = real_div(x = concat_263, y = shape_282_cast_fp16)[name = string("real_div_27")]; tensor hidden_states_629_cast_fp16 = tile(reps = real_div_27, x = var_3027_cast_fp16)[name = string("hidden_states_629_cast_fp16")]; tensor concat_264x = const()[name = string("concat_264x"), val = tensor([1, 16, -1, 128])]; tensor value_53_cast_fp16 = reshape(shape = concat_264x, x = hidden_states_629_cast_fp16)[name = string("value_53_cast_fp16")]; tensor var_3037_shape_cast_fp16 = shape(x = key_53_cast_fp16)[name = string("op_3037_shape_cast_fp16")]; int32 gather_255_axis_0 = const()[name = string("gather_255_axis_0"), val = int32(0)]; int32 gather_255_batch_dims_0 = const()[name = string("gather_255_batch_dims_0"), val = int32(0)]; bool gather_255_validate_indices_0 = const()[name = string("gather_255_validate_indices_0"), val = bool(false)]; string var_3037_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3037_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_255_to_uint16 = const()[name = string("select_255_to_uint16"), val = uint16(2)]; tensor var_3037_shape_cast_fp16_to_uint16 = cast(dtype = var_3037_shape_cast_fp16_to_uint16_dtype_0, x = var_3037_shape_cast_fp16)[name = string("cast_628")]; uint16 gather_255_cast_uint16 = gather(axis = gather_255_axis_0, batch_dims = gather_255_batch_dims_0, indices = select_255_to_uint16, validate_indices = gather_255_validate_indices_0, x = var_3037_shape_cast_fp16_to_uint16)[name = string("gather_255_cast_uint16")]; string gather_255_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_255_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_265_values0_0 = const()[name = string("concat_265_values0_0"), val = int32(1)]; int32 concat_265_values1_0 = const()[name = string("concat_265_values1_0"), val = int32(1)]; int32 concat_265_values2_0 = const()[name = string("concat_265_values2_0"), val = int32(0)]; int32 concat_265_axis_0 = const()[name = string("concat_265_axis_0"), val = int32(0)]; bool concat_265_interleave_0 = const()[name = string("concat_265_interleave_0"), val = bool(false)]; int32 gather_255_cast_uint16_to_int32 = cast(dtype = gather_255_cast_uint16_to_int32_dtype_0, x = gather_255_cast_uint16)[name = string("cast_627")]; tensor concat_265 = concat(axis = concat_265_axis_0, interleave = concat_265_interleave_0, values = (concat_265_values0_0, concat_265_values1_0, concat_265_values2_0, gather_255_cast_uint16_to_int32))[name = string("concat_265")]; tensor attention_mask_27_begin_0 = const()[name = string("attention_mask_27_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_27_end_mask_0 = const()[name = string("attention_mask_27_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_27_cast_fp16 = slice_by_index(begin = attention_mask_27_begin_0, end = concat_265, end_mask = attention_mask_27_end_mask_0, x = causal_mask)[name = string("attention_mask_27_cast_fp16")]; tensor mul_13_cast_fp16 = mul(x = query_53_cast_fp16, y = var_85_to_fp16)[name = string("mul_13_cast_fp16")]; bool matmul_13_transpose_y_0 = const()[name = string("matmul_13_transpose_y_0"), val = bool(true)]; bool matmul_13_transpose_x_0 = const()[name = string("matmul_13_transpose_x_0"), val = bool(false)]; tensor matmul_13_cast_fp16 = matmul(transpose_x = matmul_13_transpose_x_0, transpose_y = matmul_13_transpose_y_0, x = mul_13_cast_fp16, y = key_53_cast_fp16)[name = string("matmul_13_cast_fp16")]; tensor add_269_cast_fp16 = add(x = matmul_13_cast_fp16, y = attention_mask_27_cast_fp16)[name = string("add_269_cast_fp16")]; int32 softmax_13_axis_0 = const()[name = string("softmax_13_axis_0"), val = int32(-1)]; tensor softmax_13_cast_fp16 = softmax(axis = softmax_13_axis_0, x = add_269_cast_fp16)[name = string("softmax_13_cast_fp16")]; bool attn_output_53_transpose_x_0 = const()[name = string("attn_output_53_transpose_x_0"), val = bool(false)]; bool attn_output_53_transpose_y_0 = const()[name = string("attn_output_53_transpose_y_0"), val = bool(false)]; tensor attn_output_53_cast_fp16 = matmul(transpose_x = attn_output_53_transpose_x_0, transpose_y = attn_output_53_transpose_y_0, x = softmax_13_cast_fp16, y = value_53_cast_fp16)[name = string("attn_output_53_cast_fp16")]; tensor var_3046_perm_0 = const()[name = string("op_3046_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_266_axis_0 = const()[name = string("concat_266_axis_0"), val = int32(0)]; bool concat_266_interleave_0 = const()[name = string("concat_266_interleave_0"), val = bool(false)]; int32 gather_239_cast_uint16_to_int32 = cast(dtype = gather_239_cast_uint16_to_int32_dtype_0, x = gather_239_cast_uint16)[name = string("cast_633")]; tensor concat_266 = concat(axis = concat_266_axis_0, interleave = concat_266_interleave_0, values = (gather_238, gather_239_cast_uint16_to_int32, var_72))[name = string("concat_266")]; tensor var_3046_cast_fp16 = transpose(perm = var_3046_perm_0, x = attn_output_53_cast_fp16)[name = string("transpose_56")]; tensor var_3049_cast_fp16 = reshape(shape = concat_266, x = var_3046_cast_fp16)[name = string("op_3049_cast_fp16")]; tensor model_model_layers_13_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_13_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(728582976)))]; tensor linear_94_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_13_self_attn_o_proj_weight_to_fp16, x = var_3049_cast_fp16)[name = string("linear_94_cast_fp16")]; tensor hidden_states_633_cast_fp16 = add(x = hidden_states_597_cast_fp16, y = linear_94_cast_fp16)[name = string("hidden_states_633_cast_fp16")]; fp16 var_78_promoted_55_to_fp16 = const()[name = string("op_78_promoted_55_to_fp16"), val = fp16(0x1p+1)]; tensor var_3056_cast_fp16 = pow(x = hidden_states_633_cast_fp16, y = var_78_promoted_55_to_fp16)[name = string("op_3056_cast_fp16")]; tensor variance_111_axes_0 = const()[name = string("variance_111_axes_0"), val = tensor([-1])]; bool variance_111_keep_dims_0 = const()[name = string("variance_111_keep_dims_0"), val = bool(true)]; tensor variance_111_cast_fp16 = reduce_mean(axes = variance_111_axes_0, keep_dims = variance_111_keep_dims_0, x = var_3056_cast_fp16)[name = string("variance_111_cast_fp16")]; fp16 var_3059_to_fp16 = const()[name = string("op_3059_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3060_cast_fp16 = add(x = variance_111_cast_fp16, y = var_3059_to_fp16)[name = string("op_3060_cast_fp16")]; fp32 var_3061_epsilon_0 = const()[name = string("op_3061_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3061_cast_fp16 = rsqrt(epsilon = var_3061_epsilon_0, x = var_3060_cast_fp16)[name = string("op_3061_cast_fp16")]; tensor hidden_states_637_cast_fp16 = mul(x = hidden_states_633_cast_fp16, y = var_3061_cast_fp16)[name = string("hidden_states_637_cast_fp16")]; tensor model_model_layers_13_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_13_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(732777344)))]; tensor input_107_cast_fp16 = mul(x = model_model_layers_13_post_attention_layernorm_weight_to_fp16, y = hidden_states_637_cast_fp16)[name = string("input_107_cast_fp16")]; tensor model_model_layers_13_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_13_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(732779456)))]; tensor linear_95_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_13_mlp_gate_proj_weight_to_fp16, x = input_107_cast_fp16)[name = string("linear_95_cast_fp16")]; tensor var_3073_cast_fp16 = silu(x = linear_95_cast_fp16)[name = string("op_3073_cast_fp16")]; tensor model_model_layers_13_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_13_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(739070976)))]; tensor linear_96_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_13_mlp_up_proj_weight_to_fp16, x = input_107_cast_fp16)[name = string("linear_96_cast_fp16")]; tensor input_111_cast_fp16 = mul(x = var_3073_cast_fp16, y = linear_96_cast_fp16)[name = string("input_111_cast_fp16")]; tensor model_model_layers_13_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_13_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(745362496)))]; tensor linear_97_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_13_mlp_down_proj_weight_to_fp16, x = input_111_cast_fp16)[name = string("linear_97_cast_fp16")]; tensor hidden_states_643_cast_fp16 = add(x = hidden_states_633_cast_fp16, y = linear_97_cast_fp16)[name = string("hidden_states_643_cast_fp16")]; fp16 var_78_promoted_56_to_fp16 = const()[name = string("op_78_promoted_56_to_fp16"), val = fp16(0x1p+1)]; tensor var_3086_cast_fp16 = pow(x = hidden_states_643_cast_fp16, y = var_78_promoted_56_to_fp16)[name = string("op_3086_cast_fp16")]; tensor variance_113_axes_0 = const()[name = string("variance_113_axes_0"), val = tensor([-1])]; bool variance_113_keep_dims_0 = const()[name = string("variance_113_keep_dims_0"), val = bool(true)]; tensor variance_113_cast_fp16 = reduce_mean(axes = variance_113_axes_0, keep_dims = variance_113_keep_dims_0, x = var_3086_cast_fp16)[name = string("variance_113_cast_fp16")]; fp16 var_3089_to_fp16 = const()[name = string("op_3089_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3090_cast_fp16 = add(x = variance_113_cast_fp16, y = var_3089_to_fp16)[name = string("op_3090_cast_fp16")]; fp32 var_3091_epsilon_0 = const()[name = string("op_3091_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3091_cast_fp16 = rsqrt(epsilon = var_3091_epsilon_0, x = var_3090_cast_fp16)[name = string("op_3091_cast_fp16")]; tensor hidden_states_647_cast_fp16 = mul(x = hidden_states_643_cast_fp16, y = var_3091_cast_fp16)[name = string("hidden_states_647_cast_fp16")]; tensor model_model_layers_14_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751654016)))]; tensor hidden_states_651_cast_fp16 = mul(x = model_model_layers_14_input_layernorm_weight_to_fp16, y = hidden_states_647_cast_fp16)[name = string("hidden_states_651_cast_fp16")]; tensor var_3104_shape_cast_fp16 = shape(x = hidden_states_651_cast_fp16)[name = string("op_3104_shape_cast_fp16")]; int32 gather_256 = const()[name = string("gather_256"), val = int32(1)]; int32 gather_257_axis_0 = const()[name = string("gather_257_axis_0"), val = int32(0)]; int32 gather_257_batch_dims_0 = const()[name = string("gather_257_batch_dims_0"), val = int32(0)]; bool gather_257_validate_indices_0 = const()[name = string("gather_257_validate_indices_0"), val = bool(false)]; string var_3104_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3104_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_257_to_uint16 = const()[name = string("select_257_to_uint16"), val = uint16(1)]; tensor var_3104_shape_cast_fp16_to_uint16 = cast(dtype = var_3104_shape_cast_fp16_to_uint16_dtype_0, x = var_3104_shape_cast_fp16)[name = string("cast_626")]; uint16 gather_257_cast_uint16 = gather(axis = gather_257_axis_0, batch_dims = gather_257_batch_dims_0, indices = select_257_to_uint16, validate_indices = gather_257_validate_indices_0, x = var_3104_shape_cast_fp16_to_uint16)[name = string("gather_257_cast_uint16")]; string gather_257_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_257_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_14_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(751656128)))]; tensor linear_98_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_14_self_attn_q_proj_weight_to_fp16, x = hidden_states_651_cast_fp16)[name = string("linear_98_cast_fp16")]; tensor concat_267x = const()[name = string("concat_267x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_653_cast_fp16 = reshape(shape = concat_267x, x = linear_98_cast_fp16)[name = string("hidden_states_653_cast_fp16")]; fp16 var_78_promoted_57_to_fp16 = const()[name = string("op_78_promoted_57_to_fp16"), val = fp16(0x1p+1)]; tensor var_3112_cast_fp16 = pow(x = hidden_states_653_cast_fp16, y = var_78_promoted_57_to_fp16)[name = string("op_3112_cast_fp16")]; tensor variance_115_axes_0 = const()[name = string("variance_115_axes_0"), val = tensor([-1])]; bool variance_115_keep_dims_0 = const()[name = string("variance_115_keep_dims_0"), val = bool(true)]; tensor variance_115_cast_fp16 = reduce_mean(axes = variance_115_axes_0, keep_dims = variance_115_keep_dims_0, x = var_3112_cast_fp16)[name = string("variance_115_cast_fp16")]; fp16 var_3115_to_fp16 = const()[name = string("op_3115_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3116_cast_fp16 = add(x = variance_115_cast_fp16, y = var_3115_to_fp16)[name = string("op_3116_cast_fp16")]; fp32 var_3117_epsilon_0 = const()[name = string("op_3117_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3117_cast_fp16 = rsqrt(epsilon = var_3117_epsilon_0, x = var_3116_cast_fp16)[name = string("op_3117_cast_fp16")]; tensor hidden_states_657_cast_fp16 = mul(x = hidden_states_653_cast_fp16, y = var_3117_cast_fp16)[name = string("hidden_states_657_cast_fp16")]; tensor model_model_layers_14_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755850496)))]; tensor var_3120_cast_fp16 = mul(x = model_model_layers_14_self_attn_q_norm_weight_to_fp16, y = hidden_states_657_cast_fp16)[name = string("op_3120_cast_fp16")]; tensor q_29_perm_0 = const()[name = string("q_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_14_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(755850816)))]; tensor linear_99_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_14_self_attn_k_proj_weight_to_fp16, x = hidden_states_651_cast_fp16)[name = string("linear_99_cast_fp16")]; tensor concat_268x = const()[name = string("concat_268x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_661_cast_fp16 = reshape(shape = concat_268x, x = linear_99_cast_fp16)[name = string("hidden_states_661_cast_fp16")]; fp16 var_78_promoted_58_to_fp16 = const()[name = string("op_78_promoted_58_to_fp16"), val = fp16(0x1p+1)]; tensor var_3128_cast_fp16 = pow(x = hidden_states_661_cast_fp16, y = var_78_promoted_58_to_fp16)[name = string("op_3128_cast_fp16")]; tensor variance_117_axes_0 = const()[name = string("variance_117_axes_0"), val = tensor([-1])]; bool variance_117_keep_dims_0 = const()[name = string("variance_117_keep_dims_0"), val = bool(true)]; tensor variance_117_cast_fp16 = reduce_mean(axes = variance_117_axes_0, keep_dims = variance_117_keep_dims_0, x = var_3128_cast_fp16)[name = string("variance_117_cast_fp16")]; fp16 var_3131_to_fp16 = const()[name = string("op_3131_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3132_cast_fp16 = add(x = variance_117_cast_fp16, y = var_3131_to_fp16)[name = string("op_3132_cast_fp16")]; fp32 var_3133_epsilon_0 = const()[name = string("op_3133_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3133_cast_fp16 = rsqrt(epsilon = var_3133_epsilon_0, x = var_3132_cast_fp16)[name = string("op_3133_cast_fp16")]; tensor hidden_states_665_cast_fp16 = mul(x = hidden_states_661_cast_fp16, y = var_3133_cast_fp16)[name = string("hidden_states_665_cast_fp16")]; tensor model_model_layers_14_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757948032)))]; tensor var_3136_cast_fp16 = mul(x = model_model_layers_14_self_attn_k_norm_weight_to_fp16, y = hidden_states_665_cast_fp16)[name = string("op_3136_cast_fp16")]; tensor k_29_perm_0 = const()[name = string("k_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_14_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(757948352)))]; tensor linear_100_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_14_self_attn_v_proj_weight_to_fp16, x = hidden_states_651_cast_fp16)[name = string("linear_100_cast_fp16")]; tensor concat_269x = const()[name = string("concat_269x"), val = tensor([1, -1, 8, 128])]; tensor var_3141_cast_fp16 = reshape(shape = concat_269x, x = linear_100_cast_fp16)[name = string("op_3141_cast_fp16")]; tensor v_state_29_perm_0 = const()[name = string("v_state_29_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_29_cast_fp16 = transpose(perm = q_29_perm_0, x = var_3120_cast_fp16)[name = string("transpose_55")]; tensor var_3145_cast_fp16 = mul(x = q_29_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3145_cast_fp16")]; tensor x1_57_begin_0 = const()[name = string("x1_57_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_57_end_0 = const()[name = string("x1_57_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_57_end_mask_0 = const()[name = string("x1_57_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_57_cast_fp16 = slice_by_index(begin = x1_57_begin_0, end = x1_57_end_0, end_mask = x1_57_end_mask_0, x = q_29_cast_fp16)[name = string("x1_57_cast_fp16")]; tensor x2_57_begin_0 = const()[name = string("x2_57_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_57_end_0 = const()[name = string("x2_57_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_57_end_mask_0 = const()[name = string("x2_57_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_57_cast_fp16 = slice_by_index(begin = x2_57_begin_0, end = x2_57_end_0, end_mask = x2_57_end_mask_0, x = q_29_cast_fp16)[name = string("x2_57_cast_fp16")]; fp16 const_32_promoted_to_fp16 = const()[name = string("const_32_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3156_cast_fp16 = mul(x = x2_57_cast_fp16, y = const_32_promoted_to_fp16)[name = string("op_3156_cast_fp16")]; bool var_3158_interleave_0 = const()[name = string("op_3158_interleave_0"), val = bool(false)]; tensor var_3158_cast_fp16 = concat(axis = var_72, interleave = var_3158_interleave_0, values = (var_3156_cast_fp16, x1_57_cast_fp16))[name = string("op_3158_cast_fp16")]; tensor var_3159_cast_fp16 = mul(x = var_3158_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3159_cast_fp16")]; tensor query_57_cast_fp16 = add(x = var_3145_cast_fp16, y = var_3159_cast_fp16)[name = string("query_57_cast_fp16")]; tensor k_29_cast_fp16 = transpose(perm = k_29_perm_0, x = var_3136_cast_fp16)[name = string("transpose_54")]; tensor var_3161_cast_fp16 = mul(x = k_29_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3161_cast_fp16")]; tensor x1_59_begin_0 = const()[name = string("x1_59_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_59_end_0 = const()[name = string("x1_59_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_59_end_mask_0 = const()[name = string("x1_59_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_59_cast_fp16 = slice_by_index(begin = x1_59_begin_0, end = x1_59_end_0, end_mask = x1_59_end_mask_0, x = k_29_cast_fp16)[name = string("x1_59_cast_fp16")]; tensor x2_59_begin_0 = const()[name = string("x2_59_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_59_end_0 = const()[name = string("x2_59_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_59_end_mask_0 = const()[name = string("x2_59_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_59_cast_fp16 = slice_by_index(begin = x2_59_begin_0, end = x2_59_end_0, end_mask = x2_59_end_mask_0, x = k_29_cast_fp16)[name = string("x2_59_cast_fp16")]; fp16 const_33_promoted_to_fp16 = const()[name = string("const_33_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3172_cast_fp16 = mul(x = x2_59_cast_fp16, y = const_33_promoted_to_fp16)[name = string("op_3172_cast_fp16")]; bool var_3174_interleave_0 = const()[name = string("op_3174_interleave_0"), val = bool(false)]; tensor var_3174_cast_fp16 = concat(axis = var_72, interleave = var_3174_interleave_0, values = (var_3172_cast_fp16, x1_59_cast_fp16))[name = string("op_3174_cast_fp16")]; tensor var_3175_cast_fp16 = mul(x = var_3174_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3175_cast_fp16")]; tensor k_state_29_cast_fp16 = add(x = var_3161_cast_fp16, y = var_3175_cast_fp16)[name = string("k_state_29_cast_fp16")]; tensor expand_dims_168 = const()[name = string("expand_dims_168"), val = tensor([0])]; tensor expand_dims_169 = const()[name = string("expand_dims_169"), val = tensor([0])]; tensor expand_dims_171 = const()[name = string("expand_dims_171"), val = tensor([0])]; tensor concat_272_values0_0 = const()[name = string("concat_272_values0_0"), val = tensor([14])]; int32 concat_272_axis_0 = const()[name = string("concat_272_axis_0"), val = int32(0)]; bool concat_272_interleave_0 = const()[name = string("concat_272_interleave_0"), val = bool(false)]; tensor concat_272 = concat(axis = concat_272_axis_0, interleave = concat_272_interleave_0, values = (concat_272_values0_0, expand_dims_168, expand_dims_169, expand_dims_2, expand_dims_171))[name = string("concat_272")]; tensor key_cache_internal_tensor_assign_15_stride_0 = const()[name = string("key_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_272, begin_mask = key_cache_internal_tensor_assign_15_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_15_squeeze_mask_0, stride = key_cache_internal_tensor_assign_15_stride_0, update = k_state_29_cast_fp16, x = coreml_update_state_82)[name = string("key_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_15_cast_fp16, input = key_cache)[name = string("coreml_update_state_84_write_state")]; tensor coreml_update_state_84 = read_state(input = key_cache)[name = string("coreml_update_state_84")]; tensor value_cache_internal_tensor_assign_15_stride_0 = const()[name = string("value_cache_internal_tensor_assign_15_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_15_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_15_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_15_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_15_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_29_cast_fp16 = transpose(perm = v_state_29_perm_0, x = var_3141_cast_fp16)[name = string("transpose_53")]; tensor value_cache_internal_tensor_assign_15_cast_fp16 = slice_update(begin = concat_272, begin_mask = value_cache_internal_tensor_assign_15_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_15_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_15_squeeze_mask_0, stride = value_cache_internal_tensor_assign_15_stride_0, update = v_state_29_cast_fp16, x = coreml_update_state_83)[name = string("value_cache_internal_tensor_assign_15_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_15_cast_fp16, input = value_cache)[name = string("coreml_update_state_85_write_state")]; tensor coreml_update_state_85 = read_state(input = value_cache)[name = string("coreml_update_state_85")]; tensor var_3198_begin_0 = const()[name = string("op_3198_begin_0"), val = tensor([14, 0, 0, 0, 0])]; tensor var_3198_end_0 = const()[name = string("op_3198_end_0"), val = tensor([15, 1, 8, 2048, 128])]; tensor var_3198_end_mask_0 = const()[name = string("op_3198_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3198_squeeze_mask_0 = const()[name = string("op_3198_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3198_cast_fp16 = slice_by_index(begin = var_3198_begin_0, end = var_3198_end_0, end_mask = var_3198_end_mask_0, squeeze_mask = var_3198_squeeze_mask_0, x = coreml_update_state_84)[name = string("op_3198_cast_fp16")]; tensor var_3201_begin_0 = const()[name = string("op_3201_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3201_end_mask_0 = const()[name = string("op_3201_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3201_cast_fp16 = slice_by_index(begin = var_3201_begin_0, end = concat_12, end_mask = var_3201_end_mask_0, x = var_3198_cast_fp16)[name = string("op_3201_cast_fp16")]; tensor var_3203_begin_0 = const()[name = string("op_3203_begin_0"), val = tensor([14, 0, 0, 0, 0])]; tensor var_3203_end_0 = const()[name = string("op_3203_end_0"), val = tensor([15, 1, 8, 2048, 128])]; tensor var_3203_end_mask_0 = const()[name = string("op_3203_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3203_squeeze_mask_0 = const()[name = string("op_3203_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3203_cast_fp16 = slice_by_index(begin = var_3203_begin_0, end = var_3203_end_0, end_mask = var_3203_end_mask_0, squeeze_mask = var_3203_squeeze_mask_0, x = coreml_update_state_85)[name = string("op_3203_cast_fp16")]; tensor var_3206_begin_0 = const()[name = string("op_3206_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3206_end_mask_0 = const()[name = string("op_3206_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3206_cast_fp16 = slice_by_index(begin = var_3206_begin_0, end = concat_12, end_mask = var_3206_end_mask_0, x = var_3203_cast_fp16)[name = string("op_3206_cast_fp16")]; tensor var_3208_shape_cast_fp16 = shape(x = var_3201_cast_fp16)[name = string("op_3208_shape_cast_fp16")]; int32 gather_265 = const()[name = string("gather_265"), val = int32(1)]; int32 gather_266 = const()[name = string("gather_266"), val = int32(8)]; int32 gather_267_axis_0 = const()[name = string("gather_267_axis_0"), val = int32(0)]; int32 gather_267_batch_dims_0 = const()[name = string("gather_267_batch_dims_0"), val = int32(0)]; bool gather_267_validate_indices_0 = const()[name = string("gather_267_validate_indices_0"), val = bool(false)]; string var_3208_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3208_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_267_to_uint16 = const()[name = string("select_267_to_uint16"), val = uint16(2)]; tensor var_3208_shape_cast_fp16_to_uint16 = cast(dtype = var_3208_shape_cast_fp16_to_uint16_dtype_0, x = var_3208_shape_cast_fp16)[name = string("cast_624")]; uint16 gather_267_cast_uint16 = gather(axis = gather_267_axis_0, batch_dims = gather_267_batch_dims_0, indices = select_267_to_uint16, validate_indices = gather_267_validate_indices_0, x = var_3208_shape_cast_fp16_to_uint16)[name = string("gather_267_cast_uint16")]; string gather_267_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_267_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_268 = const()[name = string("gather_268"), val = int32(128)]; tensor var_3215_axes_0 = const()[name = string("op_3215_axes_0"), val = tensor([2])]; tensor var_3215_cast_fp16 = expand_dims(axes = var_3215_axes_0, x = var_3201_cast_fp16)[name = string("op_3215_cast_fp16")]; int32 concat_280_axis_0 = const()[name = string("concat_280_axis_0"), val = int32(0)]; bool concat_280_interleave_0 = const()[name = string("concat_280_interleave_0"), val = bool(false)]; int32 gather_267_cast_uint16_to_int32 = cast(dtype = gather_267_cast_uint16_to_int32_dtype_0, x = gather_267_cast_uint16)[name = string("cast_623")]; tensor concat_280 = concat(axis = concat_280_axis_0, interleave = concat_280_interleave_0, values = (gather_265, gather_266, var_78, gather_267_cast_uint16_to_int32, gather_268))[name = string("concat_280")]; tensor shape_297_cast_fp16 = shape(x = var_3215_cast_fp16)[name = string("shape_297_cast_fp16")]; tensor real_div_28 = real_div(x = concat_280, y = shape_297_cast_fp16)[name = string("real_div_28")]; tensor hidden_states_671_cast_fp16 = tile(reps = real_div_28, x = var_3215_cast_fp16)[name = string("hidden_states_671_cast_fp16")]; tensor concat_281x = const()[name = string("concat_281x"), val = tensor([1, 16, -1, 128])]; tensor key_57_cast_fp16 = reshape(shape = concat_281x, x = hidden_states_671_cast_fp16)[name = string("key_57_cast_fp16")]; tensor var_3225_shape_cast_fp16 = shape(x = var_3206_cast_fp16)[name = string("op_3225_shape_cast_fp16")]; int32 gather_269 = const()[name = string("gather_269"), val = int32(1)]; int32 gather_270 = const()[name = string("gather_270"), val = int32(8)]; int32 gather_271_axis_0 = const()[name = string("gather_271_axis_0"), val = int32(0)]; int32 gather_271_batch_dims_0 = const()[name = string("gather_271_batch_dims_0"), val = int32(0)]; bool gather_271_validate_indices_0 = const()[name = string("gather_271_validate_indices_0"), val = bool(false)]; string var_3225_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3225_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_271_to_uint16 = const()[name = string("select_271_to_uint16"), val = uint16(2)]; tensor var_3225_shape_cast_fp16_to_uint16 = cast(dtype = var_3225_shape_cast_fp16_to_uint16_dtype_0, x = var_3225_shape_cast_fp16)[name = string("cast_622")]; uint16 gather_271_cast_uint16 = gather(axis = gather_271_axis_0, batch_dims = gather_271_batch_dims_0, indices = select_271_to_uint16, validate_indices = gather_271_validate_indices_0, x = var_3225_shape_cast_fp16_to_uint16)[name = string("gather_271_cast_uint16")]; string gather_271_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_271_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_272 = const()[name = string("gather_272"), val = int32(128)]; tensor var_3232_axes_0 = const()[name = string("op_3232_axes_0"), val = tensor([2])]; tensor var_3232_cast_fp16 = expand_dims(axes = var_3232_axes_0, x = var_3206_cast_fp16)[name = string("op_3232_cast_fp16")]; int32 concat_282_axis_0 = const()[name = string("concat_282_axis_0"), val = int32(0)]; bool concat_282_interleave_0 = const()[name = string("concat_282_interleave_0"), val = bool(false)]; int32 gather_271_cast_uint16_to_int32 = cast(dtype = gather_271_cast_uint16_to_int32_dtype_0, x = gather_271_cast_uint16)[name = string("cast_621")]; tensor concat_282 = concat(axis = concat_282_axis_0, interleave = concat_282_interleave_0, values = (gather_269, gather_270, var_78, gather_271_cast_uint16_to_int32, gather_272))[name = string("concat_282")]; tensor shape_302_cast_fp16 = shape(x = var_3232_cast_fp16)[name = string("shape_302_cast_fp16")]; tensor real_div_29 = real_div(x = concat_282, y = shape_302_cast_fp16)[name = string("real_div_29")]; tensor hidden_states_675_cast_fp16 = tile(reps = real_div_29, x = var_3232_cast_fp16)[name = string("hidden_states_675_cast_fp16")]; tensor concat_283x = const()[name = string("concat_283x"), val = tensor([1, 16, -1, 128])]; tensor value_57_cast_fp16 = reshape(shape = concat_283x, x = hidden_states_675_cast_fp16)[name = string("value_57_cast_fp16")]; tensor var_3242_shape_cast_fp16 = shape(x = key_57_cast_fp16)[name = string("op_3242_shape_cast_fp16")]; int32 gather_273_axis_0 = const()[name = string("gather_273_axis_0"), val = int32(0)]; int32 gather_273_batch_dims_0 = const()[name = string("gather_273_batch_dims_0"), val = int32(0)]; bool gather_273_validate_indices_0 = const()[name = string("gather_273_validate_indices_0"), val = bool(false)]; string var_3242_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3242_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_273_to_uint16 = const()[name = string("select_273_to_uint16"), val = uint16(2)]; tensor var_3242_shape_cast_fp16_to_uint16 = cast(dtype = var_3242_shape_cast_fp16_to_uint16_dtype_0, x = var_3242_shape_cast_fp16)[name = string("cast_620")]; uint16 gather_273_cast_uint16 = gather(axis = gather_273_axis_0, batch_dims = gather_273_batch_dims_0, indices = select_273_to_uint16, validate_indices = gather_273_validate_indices_0, x = var_3242_shape_cast_fp16_to_uint16)[name = string("gather_273_cast_uint16")]; string gather_273_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_273_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_284_values0_0 = const()[name = string("concat_284_values0_0"), val = int32(1)]; int32 concat_284_values1_0 = const()[name = string("concat_284_values1_0"), val = int32(1)]; int32 concat_284_values2_0 = const()[name = string("concat_284_values2_0"), val = int32(0)]; int32 concat_284_axis_0 = const()[name = string("concat_284_axis_0"), val = int32(0)]; bool concat_284_interleave_0 = const()[name = string("concat_284_interleave_0"), val = bool(false)]; int32 gather_273_cast_uint16_to_int32 = cast(dtype = gather_273_cast_uint16_to_int32_dtype_0, x = gather_273_cast_uint16)[name = string("cast_619")]; tensor concat_284 = concat(axis = concat_284_axis_0, interleave = concat_284_interleave_0, values = (concat_284_values0_0, concat_284_values1_0, concat_284_values2_0, gather_273_cast_uint16_to_int32))[name = string("concat_284")]; tensor attention_mask_29_begin_0 = const()[name = string("attention_mask_29_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_29_end_mask_0 = const()[name = string("attention_mask_29_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_29_cast_fp16 = slice_by_index(begin = attention_mask_29_begin_0, end = concat_284, end_mask = attention_mask_29_end_mask_0, x = causal_mask)[name = string("attention_mask_29_cast_fp16")]; tensor mul_14_cast_fp16 = mul(x = query_57_cast_fp16, y = var_85_to_fp16)[name = string("mul_14_cast_fp16")]; bool matmul_14_transpose_y_0 = const()[name = string("matmul_14_transpose_y_0"), val = bool(true)]; bool matmul_14_transpose_x_0 = const()[name = string("matmul_14_transpose_x_0"), val = bool(false)]; tensor matmul_14_cast_fp16 = matmul(transpose_x = matmul_14_transpose_x_0, transpose_y = matmul_14_transpose_y_0, x = mul_14_cast_fp16, y = key_57_cast_fp16)[name = string("matmul_14_cast_fp16")]; tensor add_288_cast_fp16 = add(x = matmul_14_cast_fp16, y = attention_mask_29_cast_fp16)[name = string("add_288_cast_fp16")]; int32 softmax_14_axis_0 = const()[name = string("softmax_14_axis_0"), val = int32(-1)]; tensor softmax_14_cast_fp16 = softmax(axis = softmax_14_axis_0, x = add_288_cast_fp16)[name = string("softmax_14_cast_fp16")]; bool attn_output_57_transpose_x_0 = const()[name = string("attn_output_57_transpose_x_0"), val = bool(false)]; bool attn_output_57_transpose_y_0 = const()[name = string("attn_output_57_transpose_y_0"), val = bool(false)]; tensor attn_output_57_cast_fp16 = matmul(transpose_x = attn_output_57_transpose_x_0, transpose_y = attn_output_57_transpose_y_0, x = softmax_14_cast_fp16, y = value_57_cast_fp16)[name = string("attn_output_57_cast_fp16")]; tensor var_3251_perm_0 = const()[name = string("op_3251_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_285_axis_0 = const()[name = string("concat_285_axis_0"), val = int32(0)]; bool concat_285_interleave_0 = const()[name = string("concat_285_interleave_0"), val = bool(false)]; int32 gather_257_cast_uint16_to_int32 = cast(dtype = gather_257_cast_uint16_to_int32_dtype_0, x = gather_257_cast_uint16)[name = string("cast_625")]; tensor concat_285 = concat(axis = concat_285_axis_0, interleave = concat_285_interleave_0, values = (gather_256, gather_257_cast_uint16_to_int32, var_72))[name = string("concat_285")]; tensor var_3251_cast_fp16 = transpose(perm = var_3251_perm_0, x = attn_output_57_cast_fp16)[name = string("transpose_52")]; tensor var_3254_cast_fp16 = reshape(shape = concat_285, x = var_3251_cast_fp16)[name = string("op_3254_cast_fp16")]; tensor model_model_layers_14_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_14_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(760045568)))]; tensor linear_101_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_14_self_attn_o_proj_weight_to_fp16, x = var_3254_cast_fp16)[name = string("linear_101_cast_fp16")]; tensor hidden_states_679_cast_fp16 = add(x = hidden_states_643_cast_fp16, y = linear_101_cast_fp16)[name = string("hidden_states_679_cast_fp16")]; fp16 var_78_promoted_59_to_fp16 = const()[name = string("op_78_promoted_59_to_fp16"), val = fp16(0x1p+1)]; tensor var_3261_cast_fp16 = pow(x = hidden_states_679_cast_fp16, y = var_78_promoted_59_to_fp16)[name = string("op_3261_cast_fp16")]; tensor variance_119_axes_0 = const()[name = string("variance_119_axes_0"), val = tensor([-1])]; bool variance_119_keep_dims_0 = const()[name = string("variance_119_keep_dims_0"), val = bool(true)]; tensor variance_119_cast_fp16 = reduce_mean(axes = variance_119_axes_0, keep_dims = variance_119_keep_dims_0, x = var_3261_cast_fp16)[name = string("variance_119_cast_fp16")]; fp16 var_3264_to_fp16 = const()[name = string("op_3264_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3265_cast_fp16 = add(x = variance_119_cast_fp16, y = var_3264_to_fp16)[name = string("op_3265_cast_fp16")]; fp32 var_3266_epsilon_0 = const()[name = string("op_3266_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3266_cast_fp16 = rsqrt(epsilon = var_3266_epsilon_0, x = var_3265_cast_fp16)[name = string("op_3266_cast_fp16")]; tensor hidden_states_683_cast_fp16 = mul(x = hidden_states_679_cast_fp16, y = var_3266_cast_fp16)[name = string("hidden_states_683_cast_fp16")]; tensor model_model_layers_14_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_14_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764239936)))]; tensor input_115_cast_fp16 = mul(x = model_model_layers_14_post_attention_layernorm_weight_to_fp16, y = hidden_states_683_cast_fp16)[name = string("input_115_cast_fp16")]; tensor model_model_layers_14_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_14_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(764242048)))]; tensor linear_102_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_14_mlp_gate_proj_weight_to_fp16, x = input_115_cast_fp16)[name = string("linear_102_cast_fp16")]; tensor var_3278_cast_fp16 = silu(x = linear_102_cast_fp16)[name = string("op_3278_cast_fp16")]; tensor model_model_layers_14_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_14_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(770533568)))]; tensor linear_103_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_14_mlp_up_proj_weight_to_fp16, x = input_115_cast_fp16)[name = string("linear_103_cast_fp16")]; tensor input_119_cast_fp16 = mul(x = var_3278_cast_fp16, y = linear_103_cast_fp16)[name = string("input_119_cast_fp16")]; tensor model_model_layers_14_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_14_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(776825088)))]; tensor linear_104_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_14_mlp_down_proj_weight_to_fp16, x = input_119_cast_fp16)[name = string("linear_104_cast_fp16")]; tensor hidden_states_689_cast_fp16 = add(x = hidden_states_679_cast_fp16, y = linear_104_cast_fp16)[name = string("hidden_states_689_cast_fp16")]; fp16 var_78_promoted_60_to_fp16 = const()[name = string("op_78_promoted_60_to_fp16"), val = fp16(0x1p+1)]; tensor var_3291_cast_fp16 = pow(x = hidden_states_689_cast_fp16, y = var_78_promoted_60_to_fp16)[name = string("op_3291_cast_fp16")]; tensor variance_121_axes_0 = const()[name = string("variance_121_axes_0"), val = tensor([-1])]; bool variance_121_keep_dims_0 = const()[name = string("variance_121_keep_dims_0"), val = bool(true)]; tensor variance_121_cast_fp16 = reduce_mean(axes = variance_121_axes_0, keep_dims = variance_121_keep_dims_0, x = var_3291_cast_fp16)[name = string("variance_121_cast_fp16")]; fp16 var_3294_to_fp16 = const()[name = string("op_3294_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3295_cast_fp16 = add(x = variance_121_cast_fp16, y = var_3294_to_fp16)[name = string("op_3295_cast_fp16")]; fp32 var_3296_epsilon_0 = const()[name = string("op_3296_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3296_cast_fp16 = rsqrt(epsilon = var_3296_epsilon_0, x = var_3295_cast_fp16)[name = string("op_3296_cast_fp16")]; tensor hidden_states_693_cast_fp16 = mul(x = hidden_states_689_cast_fp16, y = var_3296_cast_fp16)[name = string("hidden_states_693_cast_fp16")]; tensor model_model_layers_15_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(783116608)))]; tensor hidden_states_697_cast_fp16 = mul(x = model_model_layers_15_input_layernorm_weight_to_fp16, y = hidden_states_693_cast_fp16)[name = string("hidden_states_697_cast_fp16")]; tensor var_3309_shape_cast_fp16 = shape(x = hidden_states_697_cast_fp16)[name = string("op_3309_shape_cast_fp16")]; int32 gather_274 = const()[name = string("gather_274"), val = int32(1)]; int32 gather_275_axis_0 = const()[name = string("gather_275_axis_0"), val = int32(0)]; int32 gather_275_batch_dims_0 = const()[name = string("gather_275_batch_dims_0"), val = int32(0)]; bool gather_275_validate_indices_0 = const()[name = string("gather_275_validate_indices_0"), val = bool(false)]; string var_3309_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3309_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_275_to_uint16 = const()[name = string("select_275_to_uint16"), val = uint16(1)]; tensor var_3309_shape_cast_fp16_to_uint16 = cast(dtype = var_3309_shape_cast_fp16_to_uint16_dtype_0, x = var_3309_shape_cast_fp16)[name = string("cast_618")]; uint16 gather_275_cast_uint16 = gather(axis = gather_275_axis_0, batch_dims = gather_275_batch_dims_0, indices = select_275_to_uint16, validate_indices = gather_275_validate_indices_0, x = var_3309_shape_cast_fp16_to_uint16)[name = string("gather_275_cast_uint16")]; string gather_275_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_275_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_15_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(783118720)))]; tensor linear_105_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_15_self_attn_q_proj_weight_to_fp16, x = hidden_states_697_cast_fp16)[name = string("linear_105_cast_fp16")]; tensor concat_286x = const()[name = string("concat_286x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_699_cast_fp16 = reshape(shape = concat_286x, x = linear_105_cast_fp16)[name = string("hidden_states_699_cast_fp16")]; fp16 var_78_promoted_61_to_fp16 = const()[name = string("op_78_promoted_61_to_fp16"), val = fp16(0x1p+1)]; tensor var_3317_cast_fp16 = pow(x = hidden_states_699_cast_fp16, y = var_78_promoted_61_to_fp16)[name = string("op_3317_cast_fp16")]; tensor variance_123_axes_0 = const()[name = string("variance_123_axes_0"), val = tensor([-1])]; bool variance_123_keep_dims_0 = const()[name = string("variance_123_keep_dims_0"), val = bool(true)]; tensor variance_123_cast_fp16 = reduce_mean(axes = variance_123_axes_0, keep_dims = variance_123_keep_dims_0, x = var_3317_cast_fp16)[name = string("variance_123_cast_fp16")]; fp16 var_3320_to_fp16 = const()[name = string("op_3320_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3321_cast_fp16 = add(x = variance_123_cast_fp16, y = var_3320_to_fp16)[name = string("op_3321_cast_fp16")]; fp32 var_3322_epsilon_0 = const()[name = string("op_3322_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3322_cast_fp16 = rsqrt(epsilon = var_3322_epsilon_0, x = var_3321_cast_fp16)[name = string("op_3322_cast_fp16")]; tensor hidden_states_703_cast_fp16 = mul(x = hidden_states_699_cast_fp16, y = var_3322_cast_fp16)[name = string("hidden_states_703_cast_fp16")]; tensor model_model_layers_15_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787313088)))]; tensor var_3325_cast_fp16 = mul(x = model_model_layers_15_self_attn_q_norm_weight_to_fp16, y = hidden_states_703_cast_fp16)[name = string("op_3325_cast_fp16")]; tensor q_31_perm_0 = const()[name = string("q_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_15_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(787313408)))]; tensor linear_106_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_15_self_attn_k_proj_weight_to_fp16, x = hidden_states_697_cast_fp16)[name = string("linear_106_cast_fp16")]; tensor concat_287x = const()[name = string("concat_287x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_707_cast_fp16 = reshape(shape = concat_287x, x = linear_106_cast_fp16)[name = string("hidden_states_707_cast_fp16")]; fp16 var_78_promoted_62_to_fp16 = const()[name = string("op_78_promoted_62_to_fp16"), val = fp16(0x1p+1)]; tensor var_3333_cast_fp16 = pow(x = hidden_states_707_cast_fp16, y = var_78_promoted_62_to_fp16)[name = string("op_3333_cast_fp16")]; tensor variance_125_axes_0 = const()[name = string("variance_125_axes_0"), val = tensor([-1])]; bool variance_125_keep_dims_0 = const()[name = string("variance_125_keep_dims_0"), val = bool(true)]; tensor variance_125_cast_fp16 = reduce_mean(axes = variance_125_axes_0, keep_dims = variance_125_keep_dims_0, x = var_3333_cast_fp16)[name = string("variance_125_cast_fp16")]; fp16 var_3336_to_fp16 = const()[name = string("op_3336_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3337_cast_fp16 = add(x = variance_125_cast_fp16, y = var_3336_to_fp16)[name = string("op_3337_cast_fp16")]; fp32 var_3338_epsilon_0 = const()[name = string("op_3338_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3338_cast_fp16 = rsqrt(epsilon = var_3338_epsilon_0, x = var_3337_cast_fp16)[name = string("op_3338_cast_fp16")]; tensor hidden_states_711_cast_fp16 = mul(x = hidden_states_707_cast_fp16, y = var_3338_cast_fp16)[name = string("hidden_states_711_cast_fp16")]; tensor model_model_layers_15_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(789410624)))]; tensor var_3341_cast_fp16 = mul(x = model_model_layers_15_self_attn_k_norm_weight_to_fp16, y = hidden_states_711_cast_fp16)[name = string("op_3341_cast_fp16")]; tensor k_31_perm_0 = const()[name = string("k_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_15_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(789410944)))]; tensor linear_107_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_15_self_attn_v_proj_weight_to_fp16, x = hidden_states_697_cast_fp16)[name = string("linear_107_cast_fp16")]; tensor concat_288x = const()[name = string("concat_288x"), val = tensor([1, -1, 8, 128])]; tensor var_3346_cast_fp16 = reshape(shape = concat_288x, x = linear_107_cast_fp16)[name = string("op_3346_cast_fp16")]; tensor v_state_31_perm_0 = const()[name = string("v_state_31_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_31_cast_fp16 = transpose(perm = q_31_perm_0, x = var_3325_cast_fp16)[name = string("transpose_51")]; tensor var_3350_cast_fp16 = mul(x = q_31_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3350_cast_fp16")]; tensor x1_61_begin_0 = const()[name = string("x1_61_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_61_end_0 = const()[name = string("x1_61_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_61_end_mask_0 = const()[name = string("x1_61_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_61_cast_fp16 = slice_by_index(begin = x1_61_begin_0, end = x1_61_end_0, end_mask = x1_61_end_mask_0, x = q_31_cast_fp16)[name = string("x1_61_cast_fp16")]; tensor x2_61_begin_0 = const()[name = string("x2_61_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_61_end_0 = const()[name = string("x2_61_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_61_end_mask_0 = const()[name = string("x2_61_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_61_cast_fp16 = slice_by_index(begin = x2_61_begin_0, end = x2_61_end_0, end_mask = x2_61_end_mask_0, x = q_31_cast_fp16)[name = string("x2_61_cast_fp16")]; fp16 const_34_promoted_to_fp16 = const()[name = string("const_34_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3361_cast_fp16 = mul(x = x2_61_cast_fp16, y = const_34_promoted_to_fp16)[name = string("op_3361_cast_fp16")]; bool var_3363_interleave_0 = const()[name = string("op_3363_interleave_0"), val = bool(false)]; tensor var_3363_cast_fp16 = concat(axis = var_72, interleave = var_3363_interleave_0, values = (var_3361_cast_fp16, x1_61_cast_fp16))[name = string("op_3363_cast_fp16")]; tensor var_3364_cast_fp16 = mul(x = var_3363_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3364_cast_fp16")]; tensor query_61_cast_fp16 = add(x = var_3350_cast_fp16, y = var_3364_cast_fp16)[name = string("query_61_cast_fp16")]; tensor k_31_cast_fp16 = transpose(perm = k_31_perm_0, x = var_3341_cast_fp16)[name = string("transpose_50")]; tensor var_3366_cast_fp16 = mul(x = k_31_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3366_cast_fp16")]; tensor x1_63_begin_0 = const()[name = string("x1_63_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_63_end_0 = const()[name = string("x1_63_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_63_end_mask_0 = const()[name = string("x1_63_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_63_cast_fp16 = slice_by_index(begin = x1_63_begin_0, end = x1_63_end_0, end_mask = x1_63_end_mask_0, x = k_31_cast_fp16)[name = string("x1_63_cast_fp16")]; tensor x2_63_begin_0 = const()[name = string("x2_63_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_63_end_0 = const()[name = string("x2_63_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_63_end_mask_0 = const()[name = string("x2_63_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_63_cast_fp16 = slice_by_index(begin = x2_63_begin_0, end = x2_63_end_0, end_mask = x2_63_end_mask_0, x = k_31_cast_fp16)[name = string("x2_63_cast_fp16")]; fp16 const_35_promoted_to_fp16 = const()[name = string("const_35_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3377_cast_fp16 = mul(x = x2_63_cast_fp16, y = const_35_promoted_to_fp16)[name = string("op_3377_cast_fp16")]; bool var_3379_interleave_0 = const()[name = string("op_3379_interleave_0"), val = bool(false)]; tensor var_3379_cast_fp16 = concat(axis = var_72, interleave = var_3379_interleave_0, values = (var_3377_cast_fp16, x1_63_cast_fp16))[name = string("op_3379_cast_fp16")]; tensor var_3380_cast_fp16 = mul(x = var_3379_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3380_cast_fp16")]; tensor k_state_31_cast_fp16 = add(x = var_3366_cast_fp16, y = var_3380_cast_fp16)[name = string("k_state_31_cast_fp16")]; tensor expand_dims_180 = const()[name = string("expand_dims_180"), val = tensor([0])]; tensor expand_dims_181 = const()[name = string("expand_dims_181"), val = tensor([0])]; tensor expand_dims_183 = const()[name = string("expand_dims_183"), val = tensor([0])]; tensor concat_291_values0_0 = const()[name = string("concat_291_values0_0"), val = tensor([15])]; int32 concat_291_axis_0 = const()[name = string("concat_291_axis_0"), val = int32(0)]; bool concat_291_interleave_0 = const()[name = string("concat_291_interleave_0"), val = bool(false)]; tensor concat_291 = concat(axis = concat_291_axis_0, interleave = concat_291_interleave_0, values = (concat_291_values0_0, expand_dims_180, expand_dims_181, expand_dims_2, expand_dims_183))[name = string("concat_291")]; tensor key_cache_internal_tensor_assign_16_stride_0 = const()[name = string("key_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_291, begin_mask = key_cache_internal_tensor_assign_16_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_16_squeeze_mask_0, stride = key_cache_internal_tensor_assign_16_stride_0, update = k_state_31_cast_fp16, x = coreml_update_state_84)[name = string("key_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_16_cast_fp16, input = key_cache)[name = string("coreml_update_state_86_write_state")]; tensor coreml_update_state_86 = read_state(input = key_cache)[name = string("coreml_update_state_86")]; tensor value_cache_internal_tensor_assign_16_stride_0 = const()[name = string("value_cache_internal_tensor_assign_16_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_16_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_16_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_16_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_16_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_31_cast_fp16 = transpose(perm = v_state_31_perm_0, x = var_3346_cast_fp16)[name = string("transpose_49")]; tensor value_cache_internal_tensor_assign_16_cast_fp16 = slice_update(begin = concat_291, begin_mask = value_cache_internal_tensor_assign_16_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_16_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_16_squeeze_mask_0, stride = value_cache_internal_tensor_assign_16_stride_0, update = v_state_31_cast_fp16, x = coreml_update_state_85)[name = string("value_cache_internal_tensor_assign_16_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_16_cast_fp16, input = value_cache)[name = string("coreml_update_state_87_write_state")]; tensor coreml_update_state_87 = read_state(input = value_cache)[name = string("coreml_update_state_87")]; tensor var_3403_begin_0 = const()[name = string("op_3403_begin_0"), val = tensor([15, 0, 0, 0, 0])]; tensor var_3403_end_0 = const()[name = string("op_3403_end_0"), val = tensor([16, 1, 8, 2048, 128])]; tensor var_3403_end_mask_0 = const()[name = string("op_3403_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3403_squeeze_mask_0 = const()[name = string("op_3403_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3403_cast_fp16 = slice_by_index(begin = var_3403_begin_0, end = var_3403_end_0, end_mask = var_3403_end_mask_0, squeeze_mask = var_3403_squeeze_mask_0, x = coreml_update_state_86)[name = string("op_3403_cast_fp16")]; tensor var_3406_begin_0 = const()[name = string("op_3406_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3406_end_mask_0 = const()[name = string("op_3406_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3406_cast_fp16 = slice_by_index(begin = var_3406_begin_0, end = concat_12, end_mask = var_3406_end_mask_0, x = var_3403_cast_fp16)[name = string("op_3406_cast_fp16")]; tensor var_3408_begin_0 = const()[name = string("op_3408_begin_0"), val = tensor([15, 0, 0, 0, 0])]; tensor var_3408_end_0 = const()[name = string("op_3408_end_0"), val = tensor([16, 1, 8, 2048, 128])]; tensor var_3408_end_mask_0 = const()[name = string("op_3408_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3408_squeeze_mask_0 = const()[name = string("op_3408_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3408_cast_fp16 = slice_by_index(begin = var_3408_begin_0, end = var_3408_end_0, end_mask = var_3408_end_mask_0, squeeze_mask = var_3408_squeeze_mask_0, x = coreml_update_state_87)[name = string("op_3408_cast_fp16")]; tensor var_3411_begin_0 = const()[name = string("op_3411_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3411_end_mask_0 = const()[name = string("op_3411_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3411_cast_fp16 = slice_by_index(begin = var_3411_begin_0, end = concat_12, end_mask = var_3411_end_mask_0, x = var_3408_cast_fp16)[name = string("op_3411_cast_fp16")]; tensor var_3413_shape_cast_fp16 = shape(x = var_3406_cast_fp16)[name = string("op_3413_shape_cast_fp16")]; int32 gather_283 = const()[name = string("gather_283"), val = int32(1)]; int32 gather_284 = const()[name = string("gather_284"), val = int32(8)]; int32 gather_285_axis_0 = const()[name = string("gather_285_axis_0"), val = int32(0)]; int32 gather_285_batch_dims_0 = const()[name = string("gather_285_batch_dims_0"), val = int32(0)]; bool gather_285_validate_indices_0 = const()[name = string("gather_285_validate_indices_0"), val = bool(false)]; string var_3413_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3413_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_285_to_uint16 = const()[name = string("select_285_to_uint16"), val = uint16(2)]; tensor var_3413_shape_cast_fp16_to_uint16 = cast(dtype = var_3413_shape_cast_fp16_to_uint16_dtype_0, x = var_3413_shape_cast_fp16)[name = string("cast_616")]; uint16 gather_285_cast_uint16 = gather(axis = gather_285_axis_0, batch_dims = gather_285_batch_dims_0, indices = select_285_to_uint16, validate_indices = gather_285_validate_indices_0, x = var_3413_shape_cast_fp16_to_uint16)[name = string("gather_285_cast_uint16")]; string gather_285_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_285_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_286 = const()[name = string("gather_286"), val = int32(128)]; tensor var_3420_axes_0 = const()[name = string("op_3420_axes_0"), val = tensor([2])]; tensor var_3420_cast_fp16 = expand_dims(axes = var_3420_axes_0, x = var_3406_cast_fp16)[name = string("op_3420_cast_fp16")]; int32 concat_299_axis_0 = const()[name = string("concat_299_axis_0"), val = int32(0)]; bool concat_299_interleave_0 = const()[name = string("concat_299_interleave_0"), val = bool(false)]; int32 gather_285_cast_uint16_to_int32 = cast(dtype = gather_285_cast_uint16_to_int32_dtype_0, x = gather_285_cast_uint16)[name = string("cast_615")]; tensor concat_299 = concat(axis = concat_299_axis_0, interleave = concat_299_interleave_0, values = (gather_283, gather_284, var_78, gather_285_cast_uint16_to_int32, gather_286))[name = string("concat_299")]; tensor shape_317_cast_fp16 = shape(x = var_3420_cast_fp16)[name = string("shape_317_cast_fp16")]; tensor real_div_30 = real_div(x = concat_299, y = shape_317_cast_fp16)[name = string("real_div_30")]; tensor hidden_states_717_cast_fp16 = tile(reps = real_div_30, x = var_3420_cast_fp16)[name = string("hidden_states_717_cast_fp16")]; tensor concat_300x = const()[name = string("concat_300x"), val = tensor([1, 16, -1, 128])]; tensor key_61_cast_fp16 = reshape(shape = concat_300x, x = hidden_states_717_cast_fp16)[name = string("key_61_cast_fp16")]; tensor var_3430_shape_cast_fp16 = shape(x = var_3411_cast_fp16)[name = string("op_3430_shape_cast_fp16")]; int32 gather_287 = const()[name = string("gather_287"), val = int32(1)]; int32 gather_288 = const()[name = string("gather_288"), val = int32(8)]; int32 gather_289_axis_0 = const()[name = string("gather_289_axis_0"), val = int32(0)]; int32 gather_289_batch_dims_0 = const()[name = string("gather_289_batch_dims_0"), val = int32(0)]; bool gather_289_validate_indices_0 = const()[name = string("gather_289_validate_indices_0"), val = bool(false)]; string var_3430_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3430_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_289_to_uint16 = const()[name = string("select_289_to_uint16"), val = uint16(2)]; tensor var_3430_shape_cast_fp16_to_uint16 = cast(dtype = var_3430_shape_cast_fp16_to_uint16_dtype_0, x = var_3430_shape_cast_fp16)[name = string("cast_614")]; uint16 gather_289_cast_uint16 = gather(axis = gather_289_axis_0, batch_dims = gather_289_batch_dims_0, indices = select_289_to_uint16, validate_indices = gather_289_validate_indices_0, x = var_3430_shape_cast_fp16_to_uint16)[name = string("gather_289_cast_uint16")]; string gather_289_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_289_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_290 = const()[name = string("gather_290"), val = int32(128)]; tensor var_3437_axes_0 = const()[name = string("op_3437_axes_0"), val = tensor([2])]; tensor var_3437_cast_fp16 = expand_dims(axes = var_3437_axes_0, x = var_3411_cast_fp16)[name = string("op_3437_cast_fp16")]; int32 concat_301_axis_0 = const()[name = string("concat_301_axis_0"), val = int32(0)]; bool concat_301_interleave_0 = const()[name = string("concat_301_interleave_0"), val = bool(false)]; int32 gather_289_cast_uint16_to_int32 = cast(dtype = gather_289_cast_uint16_to_int32_dtype_0, x = gather_289_cast_uint16)[name = string("cast_613")]; tensor concat_301 = concat(axis = concat_301_axis_0, interleave = concat_301_interleave_0, values = (gather_287, gather_288, var_78, gather_289_cast_uint16_to_int32, gather_290))[name = string("concat_301")]; tensor shape_322_cast_fp16 = shape(x = var_3437_cast_fp16)[name = string("shape_322_cast_fp16")]; tensor real_div_31 = real_div(x = concat_301, y = shape_322_cast_fp16)[name = string("real_div_31")]; tensor hidden_states_721_cast_fp16 = tile(reps = real_div_31, x = var_3437_cast_fp16)[name = string("hidden_states_721_cast_fp16")]; tensor concat_302x = const()[name = string("concat_302x"), val = tensor([1, 16, -1, 128])]; tensor value_61_cast_fp16 = reshape(shape = concat_302x, x = hidden_states_721_cast_fp16)[name = string("value_61_cast_fp16")]; tensor var_3447_shape_cast_fp16 = shape(x = key_61_cast_fp16)[name = string("op_3447_shape_cast_fp16")]; int32 gather_291_axis_0 = const()[name = string("gather_291_axis_0"), val = int32(0)]; int32 gather_291_batch_dims_0 = const()[name = string("gather_291_batch_dims_0"), val = int32(0)]; bool gather_291_validate_indices_0 = const()[name = string("gather_291_validate_indices_0"), val = bool(false)]; string var_3447_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3447_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_291_to_uint16 = const()[name = string("select_291_to_uint16"), val = uint16(2)]; tensor var_3447_shape_cast_fp16_to_uint16 = cast(dtype = var_3447_shape_cast_fp16_to_uint16_dtype_0, x = var_3447_shape_cast_fp16)[name = string("cast_612")]; uint16 gather_291_cast_uint16 = gather(axis = gather_291_axis_0, batch_dims = gather_291_batch_dims_0, indices = select_291_to_uint16, validate_indices = gather_291_validate_indices_0, x = var_3447_shape_cast_fp16_to_uint16)[name = string("gather_291_cast_uint16")]; string gather_291_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_291_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_303_values0_0 = const()[name = string("concat_303_values0_0"), val = int32(1)]; int32 concat_303_values1_0 = const()[name = string("concat_303_values1_0"), val = int32(1)]; int32 concat_303_values2_0 = const()[name = string("concat_303_values2_0"), val = int32(0)]; int32 concat_303_axis_0 = const()[name = string("concat_303_axis_0"), val = int32(0)]; bool concat_303_interleave_0 = const()[name = string("concat_303_interleave_0"), val = bool(false)]; int32 gather_291_cast_uint16_to_int32 = cast(dtype = gather_291_cast_uint16_to_int32_dtype_0, x = gather_291_cast_uint16)[name = string("cast_611")]; tensor concat_303 = concat(axis = concat_303_axis_0, interleave = concat_303_interleave_0, values = (concat_303_values0_0, concat_303_values1_0, concat_303_values2_0, gather_291_cast_uint16_to_int32))[name = string("concat_303")]; tensor attention_mask_31_begin_0 = const()[name = string("attention_mask_31_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_31_end_mask_0 = const()[name = string("attention_mask_31_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_31_cast_fp16 = slice_by_index(begin = attention_mask_31_begin_0, end = concat_303, end_mask = attention_mask_31_end_mask_0, x = causal_mask)[name = string("attention_mask_31_cast_fp16")]; tensor mul_15_cast_fp16 = mul(x = query_61_cast_fp16, y = var_85_to_fp16)[name = string("mul_15_cast_fp16")]; bool matmul_15_transpose_y_0 = const()[name = string("matmul_15_transpose_y_0"), val = bool(true)]; bool matmul_15_transpose_x_0 = const()[name = string("matmul_15_transpose_x_0"), val = bool(false)]; tensor matmul_15_cast_fp16 = matmul(transpose_x = matmul_15_transpose_x_0, transpose_y = matmul_15_transpose_y_0, x = mul_15_cast_fp16, y = key_61_cast_fp16)[name = string("matmul_15_cast_fp16")]; tensor add_307_cast_fp16 = add(x = matmul_15_cast_fp16, y = attention_mask_31_cast_fp16)[name = string("add_307_cast_fp16")]; int32 softmax_15_axis_0 = const()[name = string("softmax_15_axis_0"), val = int32(-1)]; tensor softmax_15_cast_fp16 = softmax(axis = softmax_15_axis_0, x = add_307_cast_fp16)[name = string("softmax_15_cast_fp16")]; bool attn_output_61_transpose_x_0 = const()[name = string("attn_output_61_transpose_x_0"), val = bool(false)]; bool attn_output_61_transpose_y_0 = const()[name = string("attn_output_61_transpose_y_0"), val = bool(false)]; tensor attn_output_61_cast_fp16 = matmul(transpose_x = attn_output_61_transpose_x_0, transpose_y = attn_output_61_transpose_y_0, x = softmax_15_cast_fp16, y = value_61_cast_fp16)[name = string("attn_output_61_cast_fp16")]; tensor var_3456_perm_0 = const()[name = string("op_3456_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_304_axis_0 = const()[name = string("concat_304_axis_0"), val = int32(0)]; bool concat_304_interleave_0 = const()[name = string("concat_304_interleave_0"), val = bool(false)]; int32 gather_275_cast_uint16_to_int32 = cast(dtype = gather_275_cast_uint16_to_int32_dtype_0, x = gather_275_cast_uint16)[name = string("cast_617")]; tensor concat_304 = concat(axis = concat_304_axis_0, interleave = concat_304_interleave_0, values = (gather_274, gather_275_cast_uint16_to_int32, var_72))[name = string("concat_304")]; tensor var_3456_cast_fp16 = transpose(perm = var_3456_perm_0, x = attn_output_61_cast_fp16)[name = string("transpose_48")]; tensor var_3459_cast_fp16 = reshape(shape = concat_304, x = var_3456_cast_fp16)[name = string("op_3459_cast_fp16")]; tensor model_model_layers_15_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_15_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(791508160)))]; tensor linear_108_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_15_self_attn_o_proj_weight_to_fp16, x = var_3459_cast_fp16)[name = string("linear_108_cast_fp16")]; tensor hidden_states_725_cast_fp16 = add(x = hidden_states_689_cast_fp16, y = linear_108_cast_fp16)[name = string("hidden_states_725_cast_fp16")]; fp16 var_78_promoted_63_to_fp16 = const()[name = string("op_78_promoted_63_to_fp16"), val = fp16(0x1p+1)]; tensor var_3466_cast_fp16 = pow(x = hidden_states_725_cast_fp16, y = var_78_promoted_63_to_fp16)[name = string("op_3466_cast_fp16")]; tensor variance_127_axes_0 = const()[name = string("variance_127_axes_0"), val = tensor([-1])]; bool variance_127_keep_dims_0 = const()[name = string("variance_127_keep_dims_0"), val = bool(true)]; tensor variance_127_cast_fp16 = reduce_mean(axes = variance_127_axes_0, keep_dims = variance_127_keep_dims_0, x = var_3466_cast_fp16)[name = string("variance_127_cast_fp16")]; fp16 var_3469_to_fp16 = const()[name = string("op_3469_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3470_cast_fp16 = add(x = variance_127_cast_fp16, y = var_3469_to_fp16)[name = string("op_3470_cast_fp16")]; fp32 var_3471_epsilon_0 = const()[name = string("op_3471_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3471_cast_fp16 = rsqrt(epsilon = var_3471_epsilon_0, x = var_3470_cast_fp16)[name = string("op_3471_cast_fp16")]; tensor hidden_states_729_cast_fp16 = mul(x = hidden_states_725_cast_fp16, y = var_3471_cast_fp16)[name = string("hidden_states_729_cast_fp16")]; tensor model_model_layers_15_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_15_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795702528)))]; tensor input_123_cast_fp16 = mul(x = model_model_layers_15_post_attention_layernorm_weight_to_fp16, y = hidden_states_729_cast_fp16)[name = string("input_123_cast_fp16")]; tensor model_model_layers_15_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_15_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(795704640)))]; tensor linear_109_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_15_mlp_gate_proj_weight_to_fp16, x = input_123_cast_fp16)[name = string("linear_109_cast_fp16")]; tensor var_3483_cast_fp16 = silu(x = linear_109_cast_fp16)[name = string("op_3483_cast_fp16")]; tensor model_model_layers_15_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_15_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(801996160)))]; tensor linear_110_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_15_mlp_up_proj_weight_to_fp16, x = input_123_cast_fp16)[name = string("linear_110_cast_fp16")]; tensor input_127_cast_fp16 = mul(x = var_3483_cast_fp16, y = linear_110_cast_fp16)[name = string("input_127_cast_fp16")]; tensor model_model_layers_15_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_15_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(808287680)))]; tensor linear_111_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_15_mlp_down_proj_weight_to_fp16, x = input_127_cast_fp16)[name = string("linear_111_cast_fp16")]; tensor hidden_states_735_cast_fp16 = add(x = hidden_states_725_cast_fp16, y = linear_111_cast_fp16)[name = string("hidden_states_735_cast_fp16")]; fp16 var_78_promoted_64_to_fp16 = const()[name = string("op_78_promoted_64_to_fp16"), val = fp16(0x1p+1)]; tensor var_3496_cast_fp16 = pow(x = hidden_states_735_cast_fp16, y = var_78_promoted_64_to_fp16)[name = string("op_3496_cast_fp16")]; tensor variance_129_axes_0 = const()[name = string("variance_129_axes_0"), val = tensor([-1])]; bool variance_129_keep_dims_0 = const()[name = string("variance_129_keep_dims_0"), val = bool(true)]; tensor variance_129_cast_fp16 = reduce_mean(axes = variance_129_axes_0, keep_dims = variance_129_keep_dims_0, x = var_3496_cast_fp16)[name = string("variance_129_cast_fp16")]; fp16 var_3499_to_fp16 = const()[name = string("op_3499_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3500_cast_fp16 = add(x = variance_129_cast_fp16, y = var_3499_to_fp16)[name = string("op_3500_cast_fp16")]; fp32 var_3501_epsilon_0 = const()[name = string("op_3501_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3501_cast_fp16 = rsqrt(epsilon = var_3501_epsilon_0, x = var_3500_cast_fp16)[name = string("op_3501_cast_fp16")]; tensor hidden_states_739_cast_fp16 = mul(x = hidden_states_735_cast_fp16, y = var_3501_cast_fp16)[name = string("hidden_states_739_cast_fp16")]; tensor model_model_layers_16_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814579200)))]; tensor hidden_states_743_cast_fp16 = mul(x = model_model_layers_16_input_layernorm_weight_to_fp16, y = hidden_states_739_cast_fp16)[name = string("hidden_states_743_cast_fp16")]; tensor var_3514_shape_cast_fp16 = shape(x = hidden_states_743_cast_fp16)[name = string("op_3514_shape_cast_fp16")]; int32 gather_292 = const()[name = string("gather_292"), val = int32(1)]; int32 gather_293_axis_0 = const()[name = string("gather_293_axis_0"), val = int32(0)]; int32 gather_293_batch_dims_0 = const()[name = string("gather_293_batch_dims_0"), val = int32(0)]; bool gather_293_validate_indices_0 = const()[name = string("gather_293_validate_indices_0"), val = bool(false)]; string var_3514_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3514_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_293_to_uint16 = const()[name = string("select_293_to_uint16"), val = uint16(1)]; tensor var_3514_shape_cast_fp16_to_uint16 = cast(dtype = var_3514_shape_cast_fp16_to_uint16_dtype_0, x = var_3514_shape_cast_fp16)[name = string("cast_610")]; uint16 gather_293_cast_uint16 = gather(axis = gather_293_axis_0, batch_dims = gather_293_batch_dims_0, indices = select_293_to_uint16, validate_indices = gather_293_validate_indices_0, x = var_3514_shape_cast_fp16_to_uint16)[name = string("gather_293_cast_uint16")]; string gather_293_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_293_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_16_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(814581312)))]; tensor linear_112_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_16_self_attn_q_proj_weight_to_fp16, x = hidden_states_743_cast_fp16)[name = string("linear_112_cast_fp16")]; tensor concat_305x = const()[name = string("concat_305x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_745_cast_fp16 = reshape(shape = concat_305x, x = linear_112_cast_fp16)[name = string("hidden_states_745_cast_fp16")]; fp16 var_78_promoted_65_to_fp16 = const()[name = string("op_78_promoted_65_to_fp16"), val = fp16(0x1p+1)]; tensor var_3522_cast_fp16 = pow(x = hidden_states_745_cast_fp16, y = var_78_promoted_65_to_fp16)[name = string("op_3522_cast_fp16")]; tensor variance_131_axes_0 = const()[name = string("variance_131_axes_0"), val = tensor([-1])]; bool variance_131_keep_dims_0 = const()[name = string("variance_131_keep_dims_0"), val = bool(true)]; tensor variance_131_cast_fp16 = reduce_mean(axes = variance_131_axes_0, keep_dims = variance_131_keep_dims_0, x = var_3522_cast_fp16)[name = string("variance_131_cast_fp16")]; fp16 var_3525_to_fp16 = const()[name = string("op_3525_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3526_cast_fp16 = add(x = variance_131_cast_fp16, y = var_3525_to_fp16)[name = string("op_3526_cast_fp16")]; fp32 var_3527_epsilon_0 = const()[name = string("op_3527_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3527_cast_fp16 = rsqrt(epsilon = var_3527_epsilon_0, x = var_3526_cast_fp16)[name = string("op_3527_cast_fp16")]; tensor hidden_states_749_cast_fp16 = mul(x = hidden_states_745_cast_fp16, y = var_3527_cast_fp16)[name = string("hidden_states_749_cast_fp16")]; tensor model_model_layers_16_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818775680)))]; tensor var_3530_cast_fp16 = mul(x = model_model_layers_16_self_attn_q_norm_weight_to_fp16, y = hidden_states_749_cast_fp16)[name = string("op_3530_cast_fp16")]; tensor q_33_perm_0 = const()[name = string("q_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_16_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(818776000)))]; tensor linear_113_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_16_self_attn_k_proj_weight_to_fp16, x = hidden_states_743_cast_fp16)[name = string("linear_113_cast_fp16")]; tensor concat_306x = const()[name = string("concat_306x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_753_cast_fp16 = reshape(shape = concat_306x, x = linear_113_cast_fp16)[name = string("hidden_states_753_cast_fp16")]; fp16 var_78_promoted_66_to_fp16 = const()[name = string("op_78_promoted_66_to_fp16"), val = fp16(0x1p+1)]; tensor var_3538_cast_fp16 = pow(x = hidden_states_753_cast_fp16, y = var_78_promoted_66_to_fp16)[name = string("op_3538_cast_fp16")]; tensor variance_133_axes_0 = const()[name = string("variance_133_axes_0"), val = tensor([-1])]; bool variance_133_keep_dims_0 = const()[name = string("variance_133_keep_dims_0"), val = bool(true)]; tensor variance_133_cast_fp16 = reduce_mean(axes = variance_133_axes_0, keep_dims = variance_133_keep_dims_0, x = var_3538_cast_fp16)[name = string("variance_133_cast_fp16")]; fp16 var_3541_to_fp16 = const()[name = string("op_3541_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3542_cast_fp16 = add(x = variance_133_cast_fp16, y = var_3541_to_fp16)[name = string("op_3542_cast_fp16")]; fp32 var_3543_epsilon_0 = const()[name = string("op_3543_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3543_cast_fp16 = rsqrt(epsilon = var_3543_epsilon_0, x = var_3542_cast_fp16)[name = string("op_3543_cast_fp16")]; tensor hidden_states_757_cast_fp16 = mul(x = hidden_states_753_cast_fp16, y = var_3543_cast_fp16)[name = string("hidden_states_757_cast_fp16")]; tensor model_model_layers_16_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820873216)))]; tensor var_3546_cast_fp16 = mul(x = model_model_layers_16_self_attn_k_norm_weight_to_fp16, y = hidden_states_757_cast_fp16)[name = string("op_3546_cast_fp16")]; tensor k_33_perm_0 = const()[name = string("k_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_16_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(820873536)))]; tensor linear_114_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_16_self_attn_v_proj_weight_to_fp16, x = hidden_states_743_cast_fp16)[name = string("linear_114_cast_fp16")]; tensor concat_307x = const()[name = string("concat_307x"), val = tensor([1, -1, 8, 128])]; tensor var_3551_cast_fp16 = reshape(shape = concat_307x, x = linear_114_cast_fp16)[name = string("op_3551_cast_fp16")]; tensor v_state_33_perm_0 = const()[name = string("v_state_33_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_33_cast_fp16 = transpose(perm = q_33_perm_0, x = var_3530_cast_fp16)[name = string("transpose_47")]; tensor var_3555_cast_fp16 = mul(x = q_33_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3555_cast_fp16")]; tensor x1_65_begin_0 = const()[name = string("x1_65_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_65_end_0 = const()[name = string("x1_65_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_65_end_mask_0 = const()[name = string("x1_65_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_65_cast_fp16 = slice_by_index(begin = x1_65_begin_0, end = x1_65_end_0, end_mask = x1_65_end_mask_0, x = q_33_cast_fp16)[name = string("x1_65_cast_fp16")]; tensor x2_65_begin_0 = const()[name = string("x2_65_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_65_end_0 = const()[name = string("x2_65_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_65_end_mask_0 = const()[name = string("x2_65_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_65_cast_fp16 = slice_by_index(begin = x2_65_begin_0, end = x2_65_end_0, end_mask = x2_65_end_mask_0, x = q_33_cast_fp16)[name = string("x2_65_cast_fp16")]; fp16 const_36_promoted_to_fp16 = const()[name = string("const_36_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3566_cast_fp16 = mul(x = x2_65_cast_fp16, y = const_36_promoted_to_fp16)[name = string("op_3566_cast_fp16")]; bool var_3568_interleave_0 = const()[name = string("op_3568_interleave_0"), val = bool(false)]; tensor var_3568_cast_fp16 = concat(axis = var_72, interleave = var_3568_interleave_0, values = (var_3566_cast_fp16, x1_65_cast_fp16))[name = string("op_3568_cast_fp16")]; tensor var_3569_cast_fp16 = mul(x = var_3568_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3569_cast_fp16")]; tensor query_65_cast_fp16 = add(x = var_3555_cast_fp16, y = var_3569_cast_fp16)[name = string("query_65_cast_fp16")]; tensor k_33_cast_fp16 = transpose(perm = k_33_perm_0, x = var_3546_cast_fp16)[name = string("transpose_46")]; tensor var_3571_cast_fp16 = mul(x = k_33_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3571_cast_fp16")]; tensor x1_67_begin_0 = const()[name = string("x1_67_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_67_end_0 = const()[name = string("x1_67_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_67_end_mask_0 = const()[name = string("x1_67_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_67_cast_fp16 = slice_by_index(begin = x1_67_begin_0, end = x1_67_end_0, end_mask = x1_67_end_mask_0, x = k_33_cast_fp16)[name = string("x1_67_cast_fp16")]; tensor x2_67_begin_0 = const()[name = string("x2_67_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_67_end_0 = const()[name = string("x2_67_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_67_end_mask_0 = const()[name = string("x2_67_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_67_cast_fp16 = slice_by_index(begin = x2_67_begin_0, end = x2_67_end_0, end_mask = x2_67_end_mask_0, x = k_33_cast_fp16)[name = string("x2_67_cast_fp16")]; fp16 const_37_promoted_to_fp16 = const()[name = string("const_37_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3582_cast_fp16 = mul(x = x2_67_cast_fp16, y = const_37_promoted_to_fp16)[name = string("op_3582_cast_fp16")]; bool var_3584_interleave_0 = const()[name = string("op_3584_interleave_0"), val = bool(false)]; tensor var_3584_cast_fp16 = concat(axis = var_72, interleave = var_3584_interleave_0, values = (var_3582_cast_fp16, x1_67_cast_fp16))[name = string("op_3584_cast_fp16")]; tensor var_3585_cast_fp16 = mul(x = var_3584_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3585_cast_fp16")]; tensor k_state_33_cast_fp16 = add(x = var_3571_cast_fp16, y = var_3585_cast_fp16)[name = string("k_state_33_cast_fp16")]; tensor expand_dims_192 = const()[name = string("expand_dims_192"), val = tensor([0])]; tensor expand_dims_193 = const()[name = string("expand_dims_193"), val = tensor([0])]; tensor expand_dims_195 = const()[name = string("expand_dims_195"), val = tensor([0])]; tensor concat_310_values0_0 = const()[name = string("concat_310_values0_0"), val = tensor([16])]; int32 concat_310_axis_0 = const()[name = string("concat_310_axis_0"), val = int32(0)]; bool concat_310_interleave_0 = const()[name = string("concat_310_interleave_0"), val = bool(false)]; tensor concat_310 = concat(axis = concat_310_axis_0, interleave = concat_310_interleave_0, values = (concat_310_values0_0, expand_dims_192, expand_dims_193, expand_dims_2, expand_dims_195))[name = string("concat_310")]; tensor key_cache_internal_tensor_assign_17_stride_0 = const()[name = string("key_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_310, begin_mask = key_cache_internal_tensor_assign_17_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_17_squeeze_mask_0, stride = key_cache_internal_tensor_assign_17_stride_0, update = k_state_33_cast_fp16, x = coreml_update_state_86)[name = string("key_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_17_cast_fp16, input = key_cache)[name = string("coreml_update_state_88_write_state")]; tensor coreml_update_state_88 = read_state(input = key_cache)[name = string("coreml_update_state_88")]; tensor value_cache_internal_tensor_assign_17_stride_0 = const()[name = string("value_cache_internal_tensor_assign_17_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_17_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_17_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_17_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_17_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_33_cast_fp16 = transpose(perm = v_state_33_perm_0, x = var_3551_cast_fp16)[name = string("transpose_45")]; tensor value_cache_internal_tensor_assign_17_cast_fp16 = slice_update(begin = concat_310, begin_mask = value_cache_internal_tensor_assign_17_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_17_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_17_squeeze_mask_0, stride = value_cache_internal_tensor_assign_17_stride_0, update = v_state_33_cast_fp16, x = coreml_update_state_87)[name = string("value_cache_internal_tensor_assign_17_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_17_cast_fp16, input = value_cache)[name = string("coreml_update_state_89_write_state")]; tensor coreml_update_state_89 = read_state(input = value_cache)[name = string("coreml_update_state_89")]; tensor var_3608_begin_0 = const()[name = string("op_3608_begin_0"), val = tensor([16, 0, 0, 0, 0])]; tensor var_3608_end_0 = const()[name = string("op_3608_end_0"), val = tensor([17, 1, 8, 2048, 128])]; tensor var_3608_end_mask_0 = const()[name = string("op_3608_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3608_squeeze_mask_0 = const()[name = string("op_3608_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3608_cast_fp16 = slice_by_index(begin = var_3608_begin_0, end = var_3608_end_0, end_mask = var_3608_end_mask_0, squeeze_mask = var_3608_squeeze_mask_0, x = coreml_update_state_88)[name = string("op_3608_cast_fp16")]; tensor var_3611_begin_0 = const()[name = string("op_3611_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3611_end_mask_0 = const()[name = string("op_3611_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3611_cast_fp16 = slice_by_index(begin = var_3611_begin_0, end = concat_12, end_mask = var_3611_end_mask_0, x = var_3608_cast_fp16)[name = string("op_3611_cast_fp16")]; tensor var_3613_begin_0 = const()[name = string("op_3613_begin_0"), val = tensor([16, 0, 0, 0, 0])]; tensor var_3613_end_0 = const()[name = string("op_3613_end_0"), val = tensor([17, 1, 8, 2048, 128])]; tensor var_3613_end_mask_0 = const()[name = string("op_3613_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3613_squeeze_mask_0 = const()[name = string("op_3613_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3613_cast_fp16 = slice_by_index(begin = var_3613_begin_0, end = var_3613_end_0, end_mask = var_3613_end_mask_0, squeeze_mask = var_3613_squeeze_mask_0, x = coreml_update_state_89)[name = string("op_3613_cast_fp16")]; tensor var_3616_begin_0 = const()[name = string("op_3616_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3616_end_mask_0 = const()[name = string("op_3616_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3616_cast_fp16 = slice_by_index(begin = var_3616_begin_0, end = concat_12, end_mask = var_3616_end_mask_0, x = var_3613_cast_fp16)[name = string("op_3616_cast_fp16")]; tensor var_3618_shape_cast_fp16 = shape(x = var_3611_cast_fp16)[name = string("op_3618_shape_cast_fp16")]; int32 gather_301 = const()[name = string("gather_301"), val = int32(1)]; int32 gather_302 = const()[name = string("gather_302"), val = int32(8)]; int32 gather_303_axis_0 = const()[name = string("gather_303_axis_0"), val = int32(0)]; int32 gather_303_batch_dims_0 = const()[name = string("gather_303_batch_dims_0"), val = int32(0)]; bool gather_303_validate_indices_0 = const()[name = string("gather_303_validate_indices_0"), val = bool(false)]; string var_3618_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3618_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_303_to_uint16 = const()[name = string("select_303_to_uint16"), val = uint16(2)]; tensor var_3618_shape_cast_fp16_to_uint16 = cast(dtype = var_3618_shape_cast_fp16_to_uint16_dtype_0, x = var_3618_shape_cast_fp16)[name = string("cast_608")]; uint16 gather_303_cast_uint16 = gather(axis = gather_303_axis_0, batch_dims = gather_303_batch_dims_0, indices = select_303_to_uint16, validate_indices = gather_303_validate_indices_0, x = var_3618_shape_cast_fp16_to_uint16)[name = string("gather_303_cast_uint16")]; string gather_303_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_303_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_304 = const()[name = string("gather_304"), val = int32(128)]; tensor var_3625_axes_0 = const()[name = string("op_3625_axes_0"), val = tensor([2])]; tensor var_3625_cast_fp16 = expand_dims(axes = var_3625_axes_0, x = var_3611_cast_fp16)[name = string("op_3625_cast_fp16")]; int32 concat_318_axis_0 = const()[name = string("concat_318_axis_0"), val = int32(0)]; bool concat_318_interleave_0 = const()[name = string("concat_318_interleave_0"), val = bool(false)]; int32 gather_303_cast_uint16_to_int32 = cast(dtype = gather_303_cast_uint16_to_int32_dtype_0, x = gather_303_cast_uint16)[name = string("cast_607")]; tensor concat_318 = concat(axis = concat_318_axis_0, interleave = concat_318_interleave_0, values = (gather_301, gather_302, var_78, gather_303_cast_uint16_to_int32, gather_304))[name = string("concat_318")]; tensor shape_337_cast_fp16 = shape(x = var_3625_cast_fp16)[name = string("shape_337_cast_fp16")]; tensor real_div_32 = real_div(x = concat_318, y = shape_337_cast_fp16)[name = string("real_div_32")]; tensor hidden_states_763_cast_fp16 = tile(reps = real_div_32, x = var_3625_cast_fp16)[name = string("hidden_states_763_cast_fp16")]; tensor concat_319x = const()[name = string("concat_319x"), val = tensor([1, 16, -1, 128])]; tensor key_65_cast_fp16 = reshape(shape = concat_319x, x = hidden_states_763_cast_fp16)[name = string("key_65_cast_fp16")]; tensor var_3635_shape_cast_fp16 = shape(x = var_3616_cast_fp16)[name = string("op_3635_shape_cast_fp16")]; int32 gather_305 = const()[name = string("gather_305"), val = int32(1)]; int32 gather_306 = const()[name = string("gather_306"), val = int32(8)]; int32 gather_307_axis_0 = const()[name = string("gather_307_axis_0"), val = int32(0)]; int32 gather_307_batch_dims_0 = const()[name = string("gather_307_batch_dims_0"), val = int32(0)]; bool gather_307_validate_indices_0 = const()[name = string("gather_307_validate_indices_0"), val = bool(false)]; string var_3635_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3635_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_307_to_uint16 = const()[name = string("select_307_to_uint16"), val = uint16(2)]; tensor var_3635_shape_cast_fp16_to_uint16 = cast(dtype = var_3635_shape_cast_fp16_to_uint16_dtype_0, x = var_3635_shape_cast_fp16)[name = string("cast_606")]; uint16 gather_307_cast_uint16 = gather(axis = gather_307_axis_0, batch_dims = gather_307_batch_dims_0, indices = select_307_to_uint16, validate_indices = gather_307_validate_indices_0, x = var_3635_shape_cast_fp16_to_uint16)[name = string("gather_307_cast_uint16")]; string gather_307_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_307_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_308 = const()[name = string("gather_308"), val = int32(128)]; tensor var_3642_axes_0 = const()[name = string("op_3642_axes_0"), val = tensor([2])]; tensor var_3642_cast_fp16 = expand_dims(axes = var_3642_axes_0, x = var_3616_cast_fp16)[name = string("op_3642_cast_fp16")]; int32 concat_320_axis_0 = const()[name = string("concat_320_axis_0"), val = int32(0)]; bool concat_320_interleave_0 = const()[name = string("concat_320_interleave_0"), val = bool(false)]; int32 gather_307_cast_uint16_to_int32 = cast(dtype = gather_307_cast_uint16_to_int32_dtype_0, x = gather_307_cast_uint16)[name = string("cast_605")]; tensor concat_320 = concat(axis = concat_320_axis_0, interleave = concat_320_interleave_0, values = (gather_305, gather_306, var_78, gather_307_cast_uint16_to_int32, gather_308))[name = string("concat_320")]; tensor shape_342_cast_fp16 = shape(x = var_3642_cast_fp16)[name = string("shape_342_cast_fp16")]; tensor real_div_33 = real_div(x = concat_320, y = shape_342_cast_fp16)[name = string("real_div_33")]; tensor hidden_states_767_cast_fp16 = tile(reps = real_div_33, x = var_3642_cast_fp16)[name = string("hidden_states_767_cast_fp16")]; tensor concat_321x = const()[name = string("concat_321x"), val = tensor([1, 16, -1, 128])]; tensor value_65_cast_fp16 = reshape(shape = concat_321x, x = hidden_states_767_cast_fp16)[name = string("value_65_cast_fp16")]; tensor var_3652_shape_cast_fp16 = shape(x = key_65_cast_fp16)[name = string("op_3652_shape_cast_fp16")]; int32 gather_309_axis_0 = const()[name = string("gather_309_axis_0"), val = int32(0)]; int32 gather_309_batch_dims_0 = const()[name = string("gather_309_batch_dims_0"), val = int32(0)]; bool gather_309_validate_indices_0 = const()[name = string("gather_309_validate_indices_0"), val = bool(false)]; string var_3652_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3652_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_309_to_uint16 = const()[name = string("select_309_to_uint16"), val = uint16(2)]; tensor var_3652_shape_cast_fp16_to_uint16 = cast(dtype = var_3652_shape_cast_fp16_to_uint16_dtype_0, x = var_3652_shape_cast_fp16)[name = string("cast_604")]; uint16 gather_309_cast_uint16 = gather(axis = gather_309_axis_0, batch_dims = gather_309_batch_dims_0, indices = select_309_to_uint16, validate_indices = gather_309_validate_indices_0, x = var_3652_shape_cast_fp16_to_uint16)[name = string("gather_309_cast_uint16")]; string gather_309_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_309_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_322_values0_0 = const()[name = string("concat_322_values0_0"), val = int32(1)]; int32 concat_322_values1_0 = const()[name = string("concat_322_values1_0"), val = int32(1)]; int32 concat_322_values2_0 = const()[name = string("concat_322_values2_0"), val = int32(0)]; int32 concat_322_axis_0 = const()[name = string("concat_322_axis_0"), val = int32(0)]; bool concat_322_interleave_0 = const()[name = string("concat_322_interleave_0"), val = bool(false)]; int32 gather_309_cast_uint16_to_int32 = cast(dtype = gather_309_cast_uint16_to_int32_dtype_0, x = gather_309_cast_uint16)[name = string("cast_603")]; tensor concat_322 = concat(axis = concat_322_axis_0, interleave = concat_322_interleave_0, values = (concat_322_values0_0, concat_322_values1_0, concat_322_values2_0, gather_309_cast_uint16_to_int32))[name = string("concat_322")]; tensor attention_mask_33_begin_0 = const()[name = string("attention_mask_33_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_33_end_mask_0 = const()[name = string("attention_mask_33_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_33_cast_fp16 = slice_by_index(begin = attention_mask_33_begin_0, end = concat_322, end_mask = attention_mask_33_end_mask_0, x = causal_mask)[name = string("attention_mask_33_cast_fp16")]; tensor mul_16_cast_fp16 = mul(x = query_65_cast_fp16, y = var_85_to_fp16)[name = string("mul_16_cast_fp16")]; bool matmul_16_transpose_y_0 = const()[name = string("matmul_16_transpose_y_0"), val = bool(true)]; bool matmul_16_transpose_x_0 = const()[name = string("matmul_16_transpose_x_0"), val = bool(false)]; tensor matmul_16_cast_fp16 = matmul(transpose_x = matmul_16_transpose_x_0, transpose_y = matmul_16_transpose_y_0, x = mul_16_cast_fp16, y = key_65_cast_fp16)[name = string("matmul_16_cast_fp16")]; tensor add_326_cast_fp16 = add(x = matmul_16_cast_fp16, y = attention_mask_33_cast_fp16)[name = string("add_326_cast_fp16")]; int32 softmax_16_axis_0 = const()[name = string("softmax_16_axis_0"), val = int32(-1)]; tensor softmax_16_cast_fp16 = softmax(axis = softmax_16_axis_0, x = add_326_cast_fp16)[name = string("softmax_16_cast_fp16")]; bool attn_output_65_transpose_x_0 = const()[name = string("attn_output_65_transpose_x_0"), val = bool(false)]; bool attn_output_65_transpose_y_0 = const()[name = string("attn_output_65_transpose_y_0"), val = bool(false)]; tensor attn_output_65_cast_fp16 = matmul(transpose_x = attn_output_65_transpose_x_0, transpose_y = attn_output_65_transpose_y_0, x = softmax_16_cast_fp16, y = value_65_cast_fp16)[name = string("attn_output_65_cast_fp16")]; tensor var_3661_perm_0 = const()[name = string("op_3661_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_323_axis_0 = const()[name = string("concat_323_axis_0"), val = int32(0)]; bool concat_323_interleave_0 = const()[name = string("concat_323_interleave_0"), val = bool(false)]; int32 gather_293_cast_uint16_to_int32 = cast(dtype = gather_293_cast_uint16_to_int32_dtype_0, x = gather_293_cast_uint16)[name = string("cast_609")]; tensor concat_323 = concat(axis = concat_323_axis_0, interleave = concat_323_interleave_0, values = (gather_292, gather_293_cast_uint16_to_int32, var_72))[name = string("concat_323")]; tensor var_3661_cast_fp16 = transpose(perm = var_3661_perm_0, x = attn_output_65_cast_fp16)[name = string("transpose_44")]; tensor var_3664_cast_fp16 = reshape(shape = concat_323, x = var_3661_cast_fp16)[name = string("op_3664_cast_fp16")]; tensor model_model_layers_16_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_16_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(822970752)))]; tensor linear_115_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_16_self_attn_o_proj_weight_to_fp16, x = var_3664_cast_fp16)[name = string("linear_115_cast_fp16")]; tensor hidden_states_771_cast_fp16 = add(x = hidden_states_735_cast_fp16, y = linear_115_cast_fp16)[name = string("hidden_states_771_cast_fp16")]; fp16 var_78_promoted_67_to_fp16 = const()[name = string("op_78_promoted_67_to_fp16"), val = fp16(0x1p+1)]; tensor var_3671_cast_fp16 = pow(x = hidden_states_771_cast_fp16, y = var_78_promoted_67_to_fp16)[name = string("op_3671_cast_fp16")]; tensor variance_135_axes_0 = const()[name = string("variance_135_axes_0"), val = tensor([-1])]; bool variance_135_keep_dims_0 = const()[name = string("variance_135_keep_dims_0"), val = bool(true)]; tensor variance_135_cast_fp16 = reduce_mean(axes = variance_135_axes_0, keep_dims = variance_135_keep_dims_0, x = var_3671_cast_fp16)[name = string("variance_135_cast_fp16")]; fp16 var_3674_to_fp16 = const()[name = string("op_3674_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3675_cast_fp16 = add(x = variance_135_cast_fp16, y = var_3674_to_fp16)[name = string("op_3675_cast_fp16")]; fp32 var_3676_epsilon_0 = const()[name = string("op_3676_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3676_cast_fp16 = rsqrt(epsilon = var_3676_epsilon_0, x = var_3675_cast_fp16)[name = string("op_3676_cast_fp16")]; tensor hidden_states_775_cast_fp16 = mul(x = hidden_states_771_cast_fp16, y = var_3676_cast_fp16)[name = string("hidden_states_775_cast_fp16")]; tensor model_model_layers_16_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_16_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(827165120)))]; tensor input_131_cast_fp16 = mul(x = model_model_layers_16_post_attention_layernorm_weight_to_fp16, y = hidden_states_775_cast_fp16)[name = string("input_131_cast_fp16")]; tensor model_model_layers_16_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_16_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(827167232)))]; tensor linear_116_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_16_mlp_gate_proj_weight_to_fp16, x = input_131_cast_fp16)[name = string("linear_116_cast_fp16")]; tensor var_3688_cast_fp16 = silu(x = linear_116_cast_fp16)[name = string("op_3688_cast_fp16")]; tensor model_model_layers_16_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_16_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(833458752)))]; tensor linear_117_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_16_mlp_up_proj_weight_to_fp16, x = input_131_cast_fp16)[name = string("linear_117_cast_fp16")]; tensor input_135_cast_fp16 = mul(x = var_3688_cast_fp16, y = linear_117_cast_fp16)[name = string("input_135_cast_fp16")]; tensor model_model_layers_16_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_16_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(839750272)))]; tensor linear_118_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_16_mlp_down_proj_weight_to_fp16, x = input_135_cast_fp16)[name = string("linear_118_cast_fp16")]; tensor hidden_states_781_cast_fp16 = add(x = hidden_states_771_cast_fp16, y = linear_118_cast_fp16)[name = string("hidden_states_781_cast_fp16")]; fp16 var_78_promoted_68_to_fp16 = const()[name = string("op_78_promoted_68_to_fp16"), val = fp16(0x1p+1)]; tensor var_3701_cast_fp16 = pow(x = hidden_states_781_cast_fp16, y = var_78_promoted_68_to_fp16)[name = string("op_3701_cast_fp16")]; tensor variance_137_axes_0 = const()[name = string("variance_137_axes_0"), val = tensor([-1])]; bool variance_137_keep_dims_0 = const()[name = string("variance_137_keep_dims_0"), val = bool(true)]; tensor variance_137_cast_fp16 = reduce_mean(axes = variance_137_axes_0, keep_dims = variance_137_keep_dims_0, x = var_3701_cast_fp16)[name = string("variance_137_cast_fp16")]; fp16 var_3704_to_fp16 = const()[name = string("op_3704_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3705_cast_fp16 = add(x = variance_137_cast_fp16, y = var_3704_to_fp16)[name = string("op_3705_cast_fp16")]; fp32 var_3706_epsilon_0 = const()[name = string("op_3706_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3706_cast_fp16 = rsqrt(epsilon = var_3706_epsilon_0, x = var_3705_cast_fp16)[name = string("op_3706_cast_fp16")]; tensor hidden_states_785_cast_fp16 = mul(x = hidden_states_781_cast_fp16, y = var_3706_cast_fp16)[name = string("hidden_states_785_cast_fp16")]; tensor model_model_layers_17_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846041792)))]; tensor hidden_states_789_cast_fp16 = mul(x = model_model_layers_17_input_layernorm_weight_to_fp16, y = hidden_states_785_cast_fp16)[name = string("hidden_states_789_cast_fp16")]; tensor var_3719_shape_cast_fp16 = shape(x = hidden_states_789_cast_fp16)[name = string("op_3719_shape_cast_fp16")]; int32 gather_310 = const()[name = string("gather_310"), val = int32(1)]; int32 gather_311_axis_0 = const()[name = string("gather_311_axis_0"), val = int32(0)]; int32 gather_311_batch_dims_0 = const()[name = string("gather_311_batch_dims_0"), val = int32(0)]; bool gather_311_validate_indices_0 = const()[name = string("gather_311_validate_indices_0"), val = bool(false)]; string var_3719_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3719_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_311_to_uint16 = const()[name = string("select_311_to_uint16"), val = uint16(1)]; tensor var_3719_shape_cast_fp16_to_uint16 = cast(dtype = var_3719_shape_cast_fp16_to_uint16_dtype_0, x = var_3719_shape_cast_fp16)[name = string("cast_602")]; uint16 gather_311_cast_uint16 = gather(axis = gather_311_axis_0, batch_dims = gather_311_batch_dims_0, indices = select_311_to_uint16, validate_indices = gather_311_validate_indices_0, x = var_3719_shape_cast_fp16_to_uint16)[name = string("gather_311_cast_uint16")]; string gather_311_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_311_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_17_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(846043904)))]; tensor linear_119_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_17_self_attn_q_proj_weight_to_fp16, x = hidden_states_789_cast_fp16)[name = string("linear_119_cast_fp16")]; tensor concat_324x = const()[name = string("concat_324x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_791_cast_fp16 = reshape(shape = concat_324x, x = linear_119_cast_fp16)[name = string("hidden_states_791_cast_fp16")]; fp16 var_78_promoted_69_to_fp16 = const()[name = string("op_78_promoted_69_to_fp16"), val = fp16(0x1p+1)]; tensor var_3727_cast_fp16 = pow(x = hidden_states_791_cast_fp16, y = var_78_promoted_69_to_fp16)[name = string("op_3727_cast_fp16")]; tensor variance_139_axes_0 = const()[name = string("variance_139_axes_0"), val = tensor([-1])]; bool variance_139_keep_dims_0 = const()[name = string("variance_139_keep_dims_0"), val = bool(true)]; tensor variance_139_cast_fp16 = reduce_mean(axes = variance_139_axes_0, keep_dims = variance_139_keep_dims_0, x = var_3727_cast_fp16)[name = string("variance_139_cast_fp16")]; fp16 var_3730_to_fp16 = const()[name = string("op_3730_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3731_cast_fp16 = add(x = variance_139_cast_fp16, y = var_3730_to_fp16)[name = string("op_3731_cast_fp16")]; fp32 var_3732_epsilon_0 = const()[name = string("op_3732_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3732_cast_fp16 = rsqrt(epsilon = var_3732_epsilon_0, x = var_3731_cast_fp16)[name = string("op_3732_cast_fp16")]; tensor hidden_states_795_cast_fp16 = mul(x = hidden_states_791_cast_fp16, y = var_3732_cast_fp16)[name = string("hidden_states_795_cast_fp16")]; tensor model_model_layers_17_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(850238272)))]; tensor var_3735_cast_fp16 = mul(x = model_model_layers_17_self_attn_q_norm_weight_to_fp16, y = hidden_states_795_cast_fp16)[name = string("op_3735_cast_fp16")]; tensor q_35_perm_0 = const()[name = string("q_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_17_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(850238592)))]; tensor linear_120_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_17_self_attn_k_proj_weight_to_fp16, x = hidden_states_789_cast_fp16)[name = string("linear_120_cast_fp16")]; tensor concat_325x = const()[name = string("concat_325x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_799_cast_fp16 = reshape(shape = concat_325x, x = linear_120_cast_fp16)[name = string("hidden_states_799_cast_fp16")]; fp16 var_78_promoted_70_to_fp16 = const()[name = string("op_78_promoted_70_to_fp16"), val = fp16(0x1p+1)]; tensor var_3743_cast_fp16 = pow(x = hidden_states_799_cast_fp16, y = var_78_promoted_70_to_fp16)[name = string("op_3743_cast_fp16")]; tensor variance_141_axes_0 = const()[name = string("variance_141_axes_0"), val = tensor([-1])]; bool variance_141_keep_dims_0 = const()[name = string("variance_141_keep_dims_0"), val = bool(true)]; tensor variance_141_cast_fp16 = reduce_mean(axes = variance_141_axes_0, keep_dims = variance_141_keep_dims_0, x = var_3743_cast_fp16)[name = string("variance_141_cast_fp16")]; fp16 var_3746_to_fp16 = const()[name = string("op_3746_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3747_cast_fp16 = add(x = variance_141_cast_fp16, y = var_3746_to_fp16)[name = string("op_3747_cast_fp16")]; fp32 var_3748_epsilon_0 = const()[name = string("op_3748_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3748_cast_fp16 = rsqrt(epsilon = var_3748_epsilon_0, x = var_3747_cast_fp16)[name = string("op_3748_cast_fp16")]; tensor hidden_states_803_cast_fp16 = mul(x = hidden_states_799_cast_fp16, y = var_3748_cast_fp16)[name = string("hidden_states_803_cast_fp16")]; tensor model_model_layers_17_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852335808)))]; tensor var_3751_cast_fp16 = mul(x = model_model_layers_17_self_attn_k_norm_weight_to_fp16, y = hidden_states_803_cast_fp16)[name = string("op_3751_cast_fp16")]; tensor k_35_perm_0 = const()[name = string("k_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_17_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(852336128)))]; tensor linear_121_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_17_self_attn_v_proj_weight_to_fp16, x = hidden_states_789_cast_fp16)[name = string("linear_121_cast_fp16")]; tensor concat_326x = const()[name = string("concat_326x"), val = tensor([1, -1, 8, 128])]; tensor var_3756_cast_fp16 = reshape(shape = concat_326x, x = linear_121_cast_fp16)[name = string("op_3756_cast_fp16")]; tensor v_state_35_perm_0 = const()[name = string("v_state_35_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_35_cast_fp16 = transpose(perm = q_35_perm_0, x = var_3735_cast_fp16)[name = string("transpose_43")]; tensor var_3760_cast_fp16 = mul(x = q_35_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3760_cast_fp16")]; tensor x1_69_begin_0 = const()[name = string("x1_69_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_69_end_0 = const()[name = string("x1_69_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_69_end_mask_0 = const()[name = string("x1_69_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_69_cast_fp16 = slice_by_index(begin = x1_69_begin_0, end = x1_69_end_0, end_mask = x1_69_end_mask_0, x = q_35_cast_fp16)[name = string("x1_69_cast_fp16")]; tensor x2_69_begin_0 = const()[name = string("x2_69_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_69_end_0 = const()[name = string("x2_69_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_69_end_mask_0 = const()[name = string("x2_69_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_69_cast_fp16 = slice_by_index(begin = x2_69_begin_0, end = x2_69_end_0, end_mask = x2_69_end_mask_0, x = q_35_cast_fp16)[name = string("x2_69_cast_fp16")]; fp16 const_38_promoted_to_fp16 = const()[name = string("const_38_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3771_cast_fp16 = mul(x = x2_69_cast_fp16, y = const_38_promoted_to_fp16)[name = string("op_3771_cast_fp16")]; bool var_3773_interleave_0 = const()[name = string("op_3773_interleave_0"), val = bool(false)]; tensor var_3773_cast_fp16 = concat(axis = var_72, interleave = var_3773_interleave_0, values = (var_3771_cast_fp16, x1_69_cast_fp16))[name = string("op_3773_cast_fp16")]; tensor var_3774_cast_fp16 = mul(x = var_3773_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3774_cast_fp16")]; tensor query_69_cast_fp16 = add(x = var_3760_cast_fp16, y = var_3774_cast_fp16)[name = string("query_69_cast_fp16")]; tensor k_35_cast_fp16 = transpose(perm = k_35_perm_0, x = var_3751_cast_fp16)[name = string("transpose_42")]; tensor var_3776_cast_fp16 = mul(x = k_35_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3776_cast_fp16")]; tensor x1_71_begin_0 = const()[name = string("x1_71_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_71_end_0 = const()[name = string("x1_71_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_71_end_mask_0 = const()[name = string("x1_71_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_71_cast_fp16 = slice_by_index(begin = x1_71_begin_0, end = x1_71_end_0, end_mask = x1_71_end_mask_0, x = k_35_cast_fp16)[name = string("x1_71_cast_fp16")]; tensor x2_71_begin_0 = const()[name = string("x2_71_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_71_end_0 = const()[name = string("x2_71_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_71_end_mask_0 = const()[name = string("x2_71_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_71_cast_fp16 = slice_by_index(begin = x2_71_begin_0, end = x2_71_end_0, end_mask = x2_71_end_mask_0, x = k_35_cast_fp16)[name = string("x2_71_cast_fp16")]; fp16 const_39_promoted_to_fp16 = const()[name = string("const_39_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3787_cast_fp16 = mul(x = x2_71_cast_fp16, y = const_39_promoted_to_fp16)[name = string("op_3787_cast_fp16")]; bool var_3789_interleave_0 = const()[name = string("op_3789_interleave_0"), val = bool(false)]; tensor var_3789_cast_fp16 = concat(axis = var_72, interleave = var_3789_interleave_0, values = (var_3787_cast_fp16, x1_71_cast_fp16))[name = string("op_3789_cast_fp16")]; tensor var_3790_cast_fp16 = mul(x = var_3789_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3790_cast_fp16")]; tensor k_state_35_cast_fp16 = add(x = var_3776_cast_fp16, y = var_3790_cast_fp16)[name = string("k_state_35_cast_fp16")]; tensor expand_dims_204 = const()[name = string("expand_dims_204"), val = tensor([0])]; tensor expand_dims_205 = const()[name = string("expand_dims_205"), val = tensor([0])]; tensor expand_dims_207 = const()[name = string("expand_dims_207"), val = tensor([0])]; tensor concat_329_values0_0 = const()[name = string("concat_329_values0_0"), val = tensor([17])]; int32 concat_329_axis_0 = const()[name = string("concat_329_axis_0"), val = int32(0)]; bool concat_329_interleave_0 = const()[name = string("concat_329_interleave_0"), val = bool(false)]; tensor concat_329 = concat(axis = concat_329_axis_0, interleave = concat_329_interleave_0, values = (concat_329_values0_0, expand_dims_204, expand_dims_205, expand_dims_2, expand_dims_207))[name = string("concat_329")]; tensor key_cache_internal_tensor_assign_18_stride_0 = const()[name = string("key_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_329, begin_mask = key_cache_internal_tensor_assign_18_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_18_squeeze_mask_0, stride = key_cache_internal_tensor_assign_18_stride_0, update = k_state_35_cast_fp16, x = coreml_update_state_88)[name = string("key_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_18_cast_fp16, input = key_cache)[name = string("coreml_update_state_90_write_state")]; tensor coreml_update_state_90 = read_state(input = key_cache)[name = string("coreml_update_state_90")]; tensor value_cache_internal_tensor_assign_18_stride_0 = const()[name = string("value_cache_internal_tensor_assign_18_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_18_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_18_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_18_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_18_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_35_cast_fp16 = transpose(perm = v_state_35_perm_0, x = var_3756_cast_fp16)[name = string("transpose_41")]; tensor value_cache_internal_tensor_assign_18_cast_fp16 = slice_update(begin = concat_329, begin_mask = value_cache_internal_tensor_assign_18_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_18_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_18_squeeze_mask_0, stride = value_cache_internal_tensor_assign_18_stride_0, update = v_state_35_cast_fp16, x = coreml_update_state_89)[name = string("value_cache_internal_tensor_assign_18_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_18_cast_fp16, input = value_cache)[name = string("coreml_update_state_91_write_state")]; tensor coreml_update_state_91 = read_state(input = value_cache)[name = string("coreml_update_state_91")]; tensor var_3813_begin_0 = const()[name = string("op_3813_begin_0"), val = tensor([17, 0, 0, 0, 0])]; tensor var_3813_end_0 = const()[name = string("op_3813_end_0"), val = tensor([18, 1, 8, 2048, 128])]; tensor var_3813_end_mask_0 = const()[name = string("op_3813_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3813_squeeze_mask_0 = const()[name = string("op_3813_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3813_cast_fp16 = slice_by_index(begin = var_3813_begin_0, end = var_3813_end_0, end_mask = var_3813_end_mask_0, squeeze_mask = var_3813_squeeze_mask_0, x = coreml_update_state_90)[name = string("op_3813_cast_fp16")]; tensor var_3816_begin_0 = const()[name = string("op_3816_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3816_end_mask_0 = const()[name = string("op_3816_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3816_cast_fp16 = slice_by_index(begin = var_3816_begin_0, end = concat_12, end_mask = var_3816_end_mask_0, x = var_3813_cast_fp16)[name = string("op_3816_cast_fp16")]; tensor var_3818_begin_0 = const()[name = string("op_3818_begin_0"), val = tensor([17, 0, 0, 0, 0])]; tensor var_3818_end_0 = const()[name = string("op_3818_end_0"), val = tensor([18, 1, 8, 2048, 128])]; tensor var_3818_end_mask_0 = const()[name = string("op_3818_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_3818_squeeze_mask_0 = const()[name = string("op_3818_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_3818_cast_fp16 = slice_by_index(begin = var_3818_begin_0, end = var_3818_end_0, end_mask = var_3818_end_mask_0, squeeze_mask = var_3818_squeeze_mask_0, x = coreml_update_state_91)[name = string("op_3818_cast_fp16")]; tensor var_3821_begin_0 = const()[name = string("op_3821_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_3821_end_mask_0 = const()[name = string("op_3821_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_3821_cast_fp16 = slice_by_index(begin = var_3821_begin_0, end = concat_12, end_mask = var_3821_end_mask_0, x = var_3818_cast_fp16)[name = string("op_3821_cast_fp16")]; tensor var_3823_shape_cast_fp16 = shape(x = var_3816_cast_fp16)[name = string("op_3823_shape_cast_fp16")]; int32 gather_319 = const()[name = string("gather_319"), val = int32(1)]; int32 gather_320 = const()[name = string("gather_320"), val = int32(8)]; int32 gather_321_axis_0 = const()[name = string("gather_321_axis_0"), val = int32(0)]; int32 gather_321_batch_dims_0 = const()[name = string("gather_321_batch_dims_0"), val = int32(0)]; bool gather_321_validate_indices_0 = const()[name = string("gather_321_validate_indices_0"), val = bool(false)]; string var_3823_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3823_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_321_to_uint16 = const()[name = string("select_321_to_uint16"), val = uint16(2)]; tensor var_3823_shape_cast_fp16_to_uint16 = cast(dtype = var_3823_shape_cast_fp16_to_uint16_dtype_0, x = var_3823_shape_cast_fp16)[name = string("cast_600")]; uint16 gather_321_cast_uint16 = gather(axis = gather_321_axis_0, batch_dims = gather_321_batch_dims_0, indices = select_321_to_uint16, validate_indices = gather_321_validate_indices_0, x = var_3823_shape_cast_fp16_to_uint16)[name = string("gather_321_cast_uint16")]; string gather_321_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_321_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_322 = const()[name = string("gather_322"), val = int32(128)]; tensor var_3830_axes_0 = const()[name = string("op_3830_axes_0"), val = tensor([2])]; tensor var_3830_cast_fp16 = expand_dims(axes = var_3830_axes_0, x = var_3816_cast_fp16)[name = string("op_3830_cast_fp16")]; int32 concat_337_axis_0 = const()[name = string("concat_337_axis_0"), val = int32(0)]; bool concat_337_interleave_0 = const()[name = string("concat_337_interleave_0"), val = bool(false)]; int32 gather_321_cast_uint16_to_int32 = cast(dtype = gather_321_cast_uint16_to_int32_dtype_0, x = gather_321_cast_uint16)[name = string("cast_599")]; tensor concat_337 = concat(axis = concat_337_axis_0, interleave = concat_337_interleave_0, values = (gather_319, gather_320, var_78, gather_321_cast_uint16_to_int32, gather_322))[name = string("concat_337")]; tensor shape_357_cast_fp16 = shape(x = var_3830_cast_fp16)[name = string("shape_357_cast_fp16")]; tensor real_div_34 = real_div(x = concat_337, y = shape_357_cast_fp16)[name = string("real_div_34")]; tensor hidden_states_809_cast_fp16 = tile(reps = real_div_34, x = var_3830_cast_fp16)[name = string("hidden_states_809_cast_fp16")]; tensor concat_338x = const()[name = string("concat_338x"), val = tensor([1, 16, -1, 128])]; tensor key_69_cast_fp16 = reshape(shape = concat_338x, x = hidden_states_809_cast_fp16)[name = string("key_69_cast_fp16")]; tensor var_3840_shape_cast_fp16 = shape(x = var_3821_cast_fp16)[name = string("op_3840_shape_cast_fp16")]; int32 gather_323 = const()[name = string("gather_323"), val = int32(1)]; int32 gather_324 = const()[name = string("gather_324"), val = int32(8)]; int32 gather_325_axis_0 = const()[name = string("gather_325_axis_0"), val = int32(0)]; int32 gather_325_batch_dims_0 = const()[name = string("gather_325_batch_dims_0"), val = int32(0)]; bool gather_325_validate_indices_0 = const()[name = string("gather_325_validate_indices_0"), val = bool(false)]; string var_3840_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3840_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_325_to_uint16 = const()[name = string("select_325_to_uint16"), val = uint16(2)]; tensor var_3840_shape_cast_fp16_to_uint16 = cast(dtype = var_3840_shape_cast_fp16_to_uint16_dtype_0, x = var_3840_shape_cast_fp16)[name = string("cast_598")]; uint16 gather_325_cast_uint16 = gather(axis = gather_325_axis_0, batch_dims = gather_325_batch_dims_0, indices = select_325_to_uint16, validate_indices = gather_325_validate_indices_0, x = var_3840_shape_cast_fp16_to_uint16)[name = string("gather_325_cast_uint16")]; string gather_325_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_325_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_326 = const()[name = string("gather_326"), val = int32(128)]; tensor var_3847_axes_0 = const()[name = string("op_3847_axes_0"), val = tensor([2])]; tensor var_3847_cast_fp16 = expand_dims(axes = var_3847_axes_0, x = var_3821_cast_fp16)[name = string("op_3847_cast_fp16")]; int32 concat_339_axis_0 = const()[name = string("concat_339_axis_0"), val = int32(0)]; bool concat_339_interleave_0 = const()[name = string("concat_339_interleave_0"), val = bool(false)]; int32 gather_325_cast_uint16_to_int32 = cast(dtype = gather_325_cast_uint16_to_int32_dtype_0, x = gather_325_cast_uint16)[name = string("cast_597")]; tensor concat_339 = concat(axis = concat_339_axis_0, interleave = concat_339_interleave_0, values = (gather_323, gather_324, var_78, gather_325_cast_uint16_to_int32, gather_326))[name = string("concat_339")]; tensor shape_362_cast_fp16 = shape(x = var_3847_cast_fp16)[name = string("shape_362_cast_fp16")]; tensor real_div_35 = real_div(x = concat_339, y = shape_362_cast_fp16)[name = string("real_div_35")]; tensor hidden_states_813_cast_fp16 = tile(reps = real_div_35, x = var_3847_cast_fp16)[name = string("hidden_states_813_cast_fp16")]; tensor concat_340x = const()[name = string("concat_340x"), val = tensor([1, 16, -1, 128])]; tensor value_69_cast_fp16 = reshape(shape = concat_340x, x = hidden_states_813_cast_fp16)[name = string("value_69_cast_fp16")]; tensor var_3857_shape_cast_fp16 = shape(x = key_69_cast_fp16)[name = string("op_3857_shape_cast_fp16")]; int32 gather_327_axis_0 = const()[name = string("gather_327_axis_0"), val = int32(0)]; int32 gather_327_batch_dims_0 = const()[name = string("gather_327_batch_dims_0"), val = int32(0)]; bool gather_327_validate_indices_0 = const()[name = string("gather_327_validate_indices_0"), val = bool(false)]; string var_3857_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3857_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_327_to_uint16 = const()[name = string("select_327_to_uint16"), val = uint16(2)]; tensor var_3857_shape_cast_fp16_to_uint16 = cast(dtype = var_3857_shape_cast_fp16_to_uint16_dtype_0, x = var_3857_shape_cast_fp16)[name = string("cast_596")]; uint16 gather_327_cast_uint16 = gather(axis = gather_327_axis_0, batch_dims = gather_327_batch_dims_0, indices = select_327_to_uint16, validate_indices = gather_327_validate_indices_0, x = var_3857_shape_cast_fp16_to_uint16)[name = string("gather_327_cast_uint16")]; string gather_327_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_327_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_341_values0_0 = const()[name = string("concat_341_values0_0"), val = int32(1)]; int32 concat_341_values1_0 = const()[name = string("concat_341_values1_0"), val = int32(1)]; int32 concat_341_values2_0 = const()[name = string("concat_341_values2_0"), val = int32(0)]; int32 concat_341_axis_0 = const()[name = string("concat_341_axis_0"), val = int32(0)]; bool concat_341_interleave_0 = const()[name = string("concat_341_interleave_0"), val = bool(false)]; int32 gather_327_cast_uint16_to_int32 = cast(dtype = gather_327_cast_uint16_to_int32_dtype_0, x = gather_327_cast_uint16)[name = string("cast_595")]; tensor concat_341 = concat(axis = concat_341_axis_0, interleave = concat_341_interleave_0, values = (concat_341_values0_0, concat_341_values1_0, concat_341_values2_0, gather_327_cast_uint16_to_int32))[name = string("concat_341")]; tensor attention_mask_35_begin_0 = const()[name = string("attention_mask_35_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_35_end_mask_0 = const()[name = string("attention_mask_35_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_35_cast_fp16 = slice_by_index(begin = attention_mask_35_begin_0, end = concat_341, end_mask = attention_mask_35_end_mask_0, x = causal_mask)[name = string("attention_mask_35_cast_fp16")]; tensor mul_17_cast_fp16 = mul(x = query_69_cast_fp16, y = var_85_to_fp16)[name = string("mul_17_cast_fp16")]; bool matmul_17_transpose_y_0 = const()[name = string("matmul_17_transpose_y_0"), val = bool(true)]; bool matmul_17_transpose_x_0 = const()[name = string("matmul_17_transpose_x_0"), val = bool(false)]; tensor matmul_17_cast_fp16 = matmul(transpose_x = matmul_17_transpose_x_0, transpose_y = matmul_17_transpose_y_0, x = mul_17_cast_fp16, y = key_69_cast_fp16)[name = string("matmul_17_cast_fp16")]; tensor add_345_cast_fp16 = add(x = matmul_17_cast_fp16, y = attention_mask_35_cast_fp16)[name = string("add_345_cast_fp16")]; int32 softmax_17_axis_0 = const()[name = string("softmax_17_axis_0"), val = int32(-1)]; tensor softmax_17_cast_fp16 = softmax(axis = softmax_17_axis_0, x = add_345_cast_fp16)[name = string("softmax_17_cast_fp16")]; bool attn_output_69_transpose_x_0 = const()[name = string("attn_output_69_transpose_x_0"), val = bool(false)]; bool attn_output_69_transpose_y_0 = const()[name = string("attn_output_69_transpose_y_0"), val = bool(false)]; tensor attn_output_69_cast_fp16 = matmul(transpose_x = attn_output_69_transpose_x_0, transpose_y = attn_output_69_transpose_y_0, x = softmax_17_cast_fp16, y = value_69_cast_fp16)[name = string("attn_output_69_cast_fp16")]; tensor var_3866_perm_0 = const()[name = string("op_3866_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_342_axis_0 = const()[name = string("concat_342_axis_0"), val = int32(0)]; bool concat_342_interleave_0 = const()[name = string("concat_342_interleave_0"), val = bool(false)]; int32 gather_311_cast_uint16_to_int32 = cast(dtype = gather_311_cast_uint16_to_int32_dtype_0, x = gather_311_cast_uint16)[name = string("cast_601")]; tensor concat_342 = concat(axis = concat_342_axis_0, interleave = concat_342_interleave_0, values = (gather_310, gather_311_cast_uint16_to_int32, var_72))[name = string("concat_342")]; tensor var_3866_cast_fp16 = transpose(perm = var_3866_perm_0, x = attn_output_69_cast_fp16)[name = string("transpose_40")]; tensor var_3869_cast_fp16 = reshape(shape = concat_342, x = var_3866_cast_fp16)[name = string("op_3869_cast_fp16")]; tensor model_model_layers_17_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_17_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(854433344)))]; tensor linear_122_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_17_self_attn_o_proj_weight_to_fp16, x = var_3869_cast_fp16)[name = string("linear_122_cast_fp16")]; tensor hidden_states_817_cast_fp16 = add(x = hidden_states_781_cast_fp16, y = linear_122_cast_fp16)[name = string("hidden_states_817_cast_fp16")]; fp16 var_78_promoted_71_to_fp16 = const()[name = string("op_78_promoted_71_to_fp16"), val = fp16(0x1p+1)]; tensor var_3876_cast_fp16 = pow(x = hidden_states_817_cast_fp16, y = var_78_promoted_71_to_fp16)[name = string("op_3876_cast_fp16")]; tensor variance_143_axes_0 = const()[name = string("variance_143_axes_0"), val = tensor([-1])]; bool variance_143_keep_dims_0 = const()[name = string("variance_143_keep_dims_0"), val = bool(true)]; tensor variance_143_cast_fp16 = reduce_mean(axes = variance_143_axes_0, keep_dims = variance_143_keep_dims_0, x = var_3876_cast_fp16)[name = string("variance_143_cast_fp16")]; fp16 var_3879_to_fp16 = const()[name = string("op_3879_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3880_cast_fp16 = add(x = variance_143_cast_fp16, y = var_3879_to_fp16)[name = string("op_3880_cast_fp16")]; fp32 var_3881_epsilon_0 = const()[name = string("op_3881_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3881_cast_fp16 = rsqrt(epsilon = var_3881_epsilon_0, x = var_3880_cast_fp16)[name = string("op_3881_cast_fp16")]; tensor hidden_states_821_cast_fp16 = mul(x = hidden_states_817_cast_fp16, y = var_3881_cast_fp16)[name = string("hidden_states_821_cast_fp16")]; tensor model_model_layers_17_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_17_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(858627712)))]; tensor input_139_cast_fp16 = mul(x = model_model_layers_17_post_attention_layernorm_weight_to_fp16, y = hidden_states_821_cast_fp16)[name = string("input_139_cast_fp16")]; tensor model_model_layers_17_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_17_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(858629824)))]; tensor linear_123_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_17_mlp_gate_proj_weight_to_fp16, x = input_139_cast_fp16)[name = string("linear_123_cast_fp16")]; tensor var_3893_cast_fp16 = silu(x = linear_123_cast_fp16)[name = string("op_3893_cast_fp16")]; tensor model_model_layers_17_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_17_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(864921344)))]; tensor linear_124_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_17_mlp_up_proj_weight_to_fp16, x = input_139_cast_fp16)[name = string("linear_124_cast_fp16")]; tensor input_143_cast_fp16 = mul(x = var_3893_cast_fp16, y = linear_124_cast_fp16)[name = string("input_143_cast_fp16")]; tensor model_model_layers_17_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_17_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(871212864)))]; tensor linear_125_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_17_mlp_down_proj_weight_to_fp16, x = input_143_cast_fp16)[name = string("linear_125_cast_fp16")]; tensor hidden_states_827_cast_fp16 = add(x = hidden_states_817_cast_fp16, y = linear_125_cast_fp16)[name = string("hidden_states_827_cast_fp16")]; fp16 var_78_promoted_72_to_fp16 = const()[name = string("op_78_promoted_72_to_fp16"), val = fp16(0x1p+1)]; tensor var_3906_cast_fp16 = pow(x = hidden_states_827_cast_fp16, y = var_78_promoted_72_to_fp16)[name = string("op_3906_cast_fp16")]; tensor variance_145_axes_0 = const()[name = string("variance_145_axes_0"), val = tensor([-1])]; bool variance_145_keep_dims_0 = const()[name = string("variance_145_keep_dims_0"), val = bool(true)]; tensor variance_145_cast_fp16 = reduce_mean(axes = variance_145_axes_0, keep_dims = variance_145_keep_dims_0, x = var_3906_cast_fp16)[name = string("variance_145_cast_fp16")]; fp16 var_3909_to_fp16 = const()[name = string("op_3909_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3910_cast_fp16 = add(x = variance_145_cast_fp16, y = var_3909_to_fp16)[name = string("op_3910_cast_fp16")]; fp32 var_3911_epsilon_0 = const()[name = string("op_3911_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3911_cast_fp16 = rsqrt(epsilon = var_3911_epsilon_0, x = var_3910_cast_fp16)[name = string("op_3911_cast_fp16")]; tensor hidden_states_831_cast_fp16 = mul(x = hidden_states_827_cast_fp16, y = var_3911_cast_fp16)[name = string("hidden_states_831_cast_fp16")]; tensor model_model_layers_18_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(877504384)))]; tensor hidden_states_835_cast_fp16 = mul(x = model_model_layers_18_input_layernorm_weight_to_fp16, y = hidden_states_831_cast_fp16)[name = string("hidden_states_835_cast_fp16")]; tensor var_3924_shape_cast_fp16 = shape(x = hidden_states_835_cast_fp16)[name = string("op_3924_shape_cast_fp16")]; int32 gather_328 = const()[name = string("gather_328"), val = int32(1)]; int32 gather_329_axis_0 = const()[name = string("gather_329_axis_0"), val = int32(0)]; int32 gather_329_batch_dims_0 = const()[name = string("gather_329_batch_dims_0"), val = int32(0)]; bool gather_329_validate_indices_0 = const()[name = string("gather_329_validate_indices_0"), val = bool(false)]; string var_3924_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_3924_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_329_to_uint16 = const()[name = string("select_329_to_uint16"), val = uint16(1)]; tensor var_3924_shape_cast_fp16_to_uint16 = cast(dtype = var_3924_shape_cast_fp16_to_uint16_dtype_0, x = var_3924_shape_cast_fp16)[name = string("cast_594")]; uint16 gather_329_cast_uint16 = gather(axis = gather_329_axis_0, batch_dims = gather_329_batch_dims_0, indices = select_329_to_uint16, validate_indices = gather_329_validate_indices_0, x = var_3924_shape_cast_fp16_to_uint16)[name = string("gather_329_cast_uint16")]; string gather_329_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_329_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_18_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(877506496)))]; tensor linear_126_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_18_self_attn_q_proj_weight_to_fp16, x = hidden_states_835_cast_fp16)[name = string("linear_126_cast_fp16")]; tensor concat_343x = const()[name = string("concat_343x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_837_cast_fp16 = reshape(shape = concat_343x, x = linear_126_cast_fp16)[name = string("hidden_states_837_cast_fp16")]; fp16 var_78_promoted_73_to_fp16 = const()[name = string("op_78_promoted_73_to_fp16"), val = fp16(0x1p+1)]; tensor var_3932_cast_fp16 = pow(x = hidden_states_837_cast_fp16, y = var_78_promoted_73_to_fp16)[name = string("op_3932_cast_fp16")]; tensor variance_147_axes_0 = const()[name = string("variance_147_axes_0"), val = tensor([-1])]; bool variance_147_keep_dims_0 = const()[name = string("variance_147_keep_dims_0"), val = bool(true)]; tensor variance_147_cast_fp16 = reduce_mean(axes = variance_147_axes_0, keep_dims = variance_147_keep_dims_0, x = var_3932_cast_fp16)[name = string("variance_147_cast_fp16")]; fp16 var_3935_to_fp16 = const()[name = string("op_3935_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3936_cast_fp16 = add(x = variance_147_cast_fp16, y = var_3935_to_fp16)[name = string("op_3936_cast_fp16")]; fp32 var_3937_epsilon_0 = const()[name = string("op_3937_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3937_cast_fp16 = rsqrt(epsilon = var_3937_epsilon_0, x = var_3936_cast_fp16)[name = string("op_3937_cast_fp16")]; tensor hidden_states_841_cast_fp16 = mul(x = hidden_states_837_cast_fp16, y = var_3937_cast_fp16)[name = string("hidden_states_841_cast_fp16")]; tensor model_model_layers_18_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881700864)))]; tensor var_3940_cast_fp16 = mul(x = model_model_layers_18_self_attn_q_norm_weight_to_fp16, y = hidden_states_841_cast_fp16)[name = string("op_3940_cast_fp16")]; tensor q_37_perm_0 = const()[name = string("q_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_18_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(881701184)))]; tensor linear_127_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_18_self_attn_k_proj_weight_to_fp16, x = hidden_states_835_cast_fp16)[name = string("linear_127_cast_fp16")]; tensor concat_344x = const()[name = string("concat_344x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_845_cast_fp16 = reshape(shape = concat_344x, x = linear_127_cast_fp16)[name = string("hidden_states_845_cast_fp16")]; fp16 var_78_promoted_74_to_fp16 = const()[name = string("op_78_promoted_74_to_fp16"), val = fp16(0x1p+1)]; tensor var_3948_cast_fp16 = pow(x = hidden_states_845_cast_fp16, y = var_78_promoted_74_to_fp16)[name = string("op_3948_cast_fp16")]; tensor variance_149_axes_0 = const()[name = string("variance_149_axes_0"), val = tensor([-1])]; bool variance_149_keep_dims_0 = const()[name = string("variance_149_keep_dims_0"), val = bool(true)]; tensor variance_149_cast_fp16 = reduce_mean(axes = variance_149_axes_0, keep_dims = variance_149_keep_dims_0, x = var_3948_cast_fp16)[name = string("variance_149_cast_fp16")]; fp16 var_3951_to_fp16 = const()[name = string("op_3951_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_3952_cast_fp16 = add(x = variance_149_cast_fp16, y = var_3951_to_fp16)[name = string("op_3952_cast_fp16")]; fp32 var_3953_epsilon_0 = const()[name = string("op_3953_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_3953_cast_fp16 = rsqrt(epsilon = var_3953_epsilon_0, x = var_3952_cast_fp16)[name = string("op_3953_cast_fp16")]; tensor hidden_states_849_cast_fp16 = mul(x = hidden_states_845_cast_fp16, y = var_3953_cast_fp16)[name = string("hidden_states_849_cast_fp16")]; tensor model_model_layers_18_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(883798400)))]; tensor var_3956_cast_fp16 = mul(x = model_model_layers_18_self_attn_k_norm_weight_to_fp16, y = hidden_states_849_cast_fp16)[name = string("op_3956_cast_fp16")]; tensor k_37_perm_0 = const()[name = string("k_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_18_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(883798720)))]; tensor linear_128_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_18_self_attn_v_proj_weight_to_fp16, x = hidden_states_835_cast_fp16)[name = string("linear_128_cast_fp16")]; tensor concat_345x = const()[name = string("concat_345x"), val = tensor([1, -1, 8, 128])]; tensor var_3961_cast_fp16 = reshape(shape = concat_345x, x = linear_128_cast_fp16)[name = string("op_3961_cast_fp16")]; tensor v_state_37_perm_0 = const()[name = string("v_state_37_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_37_cast_fp16 = transpose(perm = q_37_perm_0, x = var_3940_cast_fp16)[name = string("transpose_39")]; tensor var_3965_cast_fp16 = mul(x = q_37_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3965_cast_fp16")]; tensor x1_73_begin_0 = const()[name = string("x1_73_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_73_end_0 = const()[name = string("x1_73_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_73_end_mask_0 = const()[name = string("x1_73_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_73_cast_fp16 = slice_by_index(begin = x1_73_begin_0, end = x1_73_end_0, end_mask = x1_73_end_mask_0, x = q_37_cast_fp16)[name = string("x1_73_cast_fp16")]; tensor x2_73_begin_0 = const()[name = string("x2_73_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_73_end_0 = const()[name = string("x2_73_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_73_end_mask_0 = const()[name = string("x2_73_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_73_cast_fp16 = slice_by_index(begin = x2_73_begin_0, end = x2_73_end_0, end_mask = x2_73_end_mask_0, x = q_37_cast_fp16)[name = string("x2_73_cast_fp16")]; fp16 const_40_promoted_to_fp16 = const()[name = string("const_40_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3976_cast_fp16 = mul(x = x2_73_cast_fp16, y = const_40_promoted_to_fp16)[name = string("op_3976_cast_fp16")]; bool var_3978_interleave_0 = const()[name = string("op_3978_interleave_0"), val = bool(false)]; tensor var_3978_cast_fp16 = concat(axis = var_72, interleave = var_3978_interleave_0, values = (var_3976_cast_fp16, x1_73_cast_fp16))[name = string("op_3978_cast_fp16")]; tensor var_3979_cast_fp16 = mul(x = var_3978_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3979_cast_fp16")]; tensor query_73_cast_fp16 = add(x = var_3965_cast_fp16, y = var_3979_cast_fp16)[name = string("query_73_cast_fp16")]; tensor k_37_cast_fp16 = transpose(perm = k_37_perm_0, x = var_3956_cast_fp16)[name = string("transpose_38")]; tensor var_3981_cast_fp16 = mul(x = k_37_cast_fp16, y = cos_5_cast_fp16)[name = string("op_3981_cast_fp16")]; tensor x1_75_begin_0 = const()[name = string("x1_75_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_75_end_0 = const()[name = string("x1_75_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_75_end_mask_0 = const()[name = string("x1_75_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_75_cast_fp16 = slice_by_index(begin = x1_75_begin_0, end = x1_75_end_0, end_mask = x1_75_end_mask_0, x = k_37_cast_fp16)[name = string("x1_75_cast_fp16")]; tensor x2_75_begin_0 = const()[name = string("x2_75_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_75_end_0 = const()[name = string("x2_75_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_75_end_mask_0 = const()[name = string("x2_75_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_75_cast_fp16 = slice_by_index(begin = x2_75_begin_0, end = x2_75_end_0, end_mask = x2_75_end_mask_0, x = k_37_cast_fp16)[name = string("x2_75_cast_fp16")]; fp16 const_41_promoted_to_fp16 = const()[name = string("const_41_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_3992_cast_fp16 = mul(x = x2_75_cast_fp16, y = const_41_promoted_to_fp16)[name = string("op_3992_cast_fp16")]; bool var_3994_interleave_0 = const()[name = string("op_3994_interleave_0"), val = bool(false)]; tensor var_3994_cast_fp16 = concat(axis = var_72, interleave = var_3994_interleave_0, values = (var_3992_cast_fp16, x1_75_cast_fp16))[name = string("op_3994_cast_fp16")]; tensor var_3995_cast_fp16 = mul(x = var_3994_cast_fp16, y = sin_5_cast_fp16)[name = string("op_3995_cast_fp16")]; tensor k_state_37_cast_fp16 = add(x = var_3981_cast_fp16, y = var_3995_cast_fp16)[name = string("k_state_37_cast_fp16")]; tensor expand_dims_216 = const()[name = string("expand_dims_216"), val = tensor([0])]; tensor expand_dims_217 = const()[name = string("expand_dims_217"), val = tensor([0])]; tensor expand_dims_219 = const()[name = string("expand_dims_219"), val = tensor([0])]; tensor concat_348_values0_0 = const()[name = string("concat_348_values0_0"), val = tensor([18])]; int32 concat_348_axis_0 = const()[name = string("concat_348_axis_0"), val = int32(0)]; bool concat_348_interleave_0 = const()[name = string("concat_348_interleave_0"), val = bool(false)]; tensor concat_348 = concat(axis = concat_348_axis_0, interleave = concat_348_interleave_0, values = (concat_348_values0_0, expand_dims_216, expand_dims_217, expand_dims_2, expand_dims_219))[name = string("concat_348")]; tensor key_cache_internal_tensor_assign_19_stride_0 = const()[name = string("key_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_348, begin_mask = key_cache_internal_tensor_assign_19_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_19_squeeze_mask_0, stride = key_cache_internal_tensor_assign_19_stride_0, update = k_state_37_cast_fp16, x = coreml_update_state_90)[name = string("key_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_19_cast_fp16, input = key_cache)[name = string("coreml_update_state_92_write_state")]; tensor coreml_update_state_92 = read_state(input = key_cache)[name = string("coreml_update_state_92")]; tensor value_cache_internal_tensor_assign_19_stride_0 = const()[name = string("value_cache_internal_tensor_assign_19_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_19_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_19_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_19_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_19_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_37_cast_fp16 = transpose(perm = v_state_37_perm_0, x = var_3961_cast_fp16)[name = string("transpose_37")]; tensor value_cache_internal_tensor_assign_19_cast_fp16 = slice_update(begin = concat_348, begin_mask = value_cache_internal_tensor_assign_19_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_19_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_19_squeeze_mask_0, stride = value_cache_internal_tensor_assign_19_stride_0, update = v_state_37_cast_fp16, x = coreml_update_state_91)[name = string("value_cache_internal_tensor_assign_19_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_19_cast_fp16, input = value_cache)[name = string("coreml_update_state_93_write_state")]; tensor coreml_update_state_93 = read_state(input = value_cache)[name = string("coreml_update_state_93")]; tensor var_4018_begin_0 = const()[name = string("op_4018_begin_0"), val = tensor([18, 0, 0, 0, 0])]; tensor var_4018_end_0 = const()[name = string("op_4018_end_0"), val = tensor([19, 1, 8, 2048, 128])]; tensor var_4018_end_mask_0 = const()[name = string("op_4018_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4018_squeeze_mask_0 = const()[name = string("op_4018_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4018_cast_fp16 = slice_by_index(begin = var_4018_begin_0, end = var_4018_end_0, end_mask = var_4018_end_mask_0, squeeze_mask = var_4018_squeeze_mask_0, x = coreml_update_state_92)[name = string("op_4018_cast_fp16")]; tensor var_4021_begin_0 = const()[name = string("op_4021_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4021_end_mask_0 = const()[name = string("op_4021_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4021_cast_fp16 = slice_by_index(begin = var_4021_begin_0, end = concat_12, end_mask = var_4021_end_mask_0, x = var_4018_cast_fp16)[name = string("op_4021_cast_fp16")]; tensor var_4023_begin_0 = const()[name = string("op_4023_begin_0"), val = tensor([18, 0, 0, 0, 0])]; tensor var_4023_end_0 = const()[name = string("op_4023_end_0"), val = tensor([19, 1, 8, 2048, 128])]; tensor var_4023_end_mask_0 = const()[name = string("op_4023_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4023_squeeze_mask_0 = const()[name = string("op_4023_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4023_cast_fp16 = slice_by_index(begin = var_4023_begin_0, end = var_4023_end_0, end_mask = var_4023_end_mask_0, squeeze_mask = var_4023_squeeze_mask_0, x = coreml_update_state_93)[name = string("op_4023_cast_fp16")]; tensor var_4026_begin_0 = const()[name = string("op_4026_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4026_end_mask_0 = const()[name = string("op_4026_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4026_cast_fp16 = slice_by_index(begin = var_4026_begin_0, end = concat_12, end_mask = var_4026_end_mask_0, x = var_4023_cast_fp16)[name = string("op_4026_cast_fp16")]; tensor var_4028_shape_cast_fp16 = shape(x = var_4021_cast_fp16)[name = string("op_4028_shape_cast_fp16")]; int32 gather_337 = const()[name = string("gather_337"), val = int32(1)]; int32 gather_338 = const()[name = string("gather_338"), val = int32(8)]; int32 gather_339_axis_0 = const()[name = string("gather_339_axis_0"), val = int32(0)]; int32 gather_339_batch_dims_0 = const()[name = string("gather_339_batch_dims_0"), val = int32(0)]; bool gather_339_validate_indices_0 = const()[name = string("gather_339_validate_indices_0"), val = bool(false)]; string var_4028_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4028_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_339_to_uint16 = const()[name = string("select_339_to_uint16"), val = uint16(2)]; tensor var_4028_shape_cast_fp16_to_uint16 = cast(dtype = var_4028_shape_cast_fp16_to_uint16_dtype_0, x = var_4028_shape_cast_fp16)[name = string("cast_592")]; uint16 gather_339_cast_uint16 = gather(axis = gather_339_axis_0, batch_dims = gather_339_batch_dims_0, indices = select_339_to_uint16, validate_indices = gather_339_validate_indices_0, x = var_4028_shape_cast_fp16_to_uint16)[name = string("gather_339_cast_uint16")]; string gather_339_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_339_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_340 = const()[name = string("gather_340"), val = int32(128)]; tensor var_4035_axes_0 = const()[name = string("op_4035_axes_0"), val = tensor([2])]; tensor var_4035_cast_fp16 = expand_dims(axes = var_4035_axes_0, x = var_4021_cast_fp16)[name = string("op_4035_cast_fp16")]; int32 concat_356_axis_0 = const()[name = string("concat_356_axis_0"), val = int32(0)]; bool concat_356_interleave_0 = const()[name = string("concat_356_interleave_0"), val = bool(false)]; int32 gather_339_cast_uint16_to_int32 = cast(dtype = gather_339_cast_uint16_to_int32_dtype_0, x = gather_339_cast_uint16)[name = string("cast_591")]; tensor concat_356 = concat(axis = concat_356_axis_0, interleave = concat_356_interleave_0, values = (gather_337, gather_338, var_78, gather_339_cast_uint16_to_int32, gather_340))[name = string("concat_356")]; tensor shape_377_cast_fp16 = shape(x = var_4035_cast_fp16)[name = string("shape_377_cast_fp16")]; tensor real_div_36 = real_div(x = concat_356, y = shape_377_cast_fp16)[name = string("real_div_36")]; tensor hidden_states_855_cast_fp16 = tile(reps = real_div_36, x = var_4035_cast_fp16)[name = string("hidden_states_855_cast_fp16")]; tensor concat_357x = const()[name = string("concat_357x"), val = tensor([1, 16, -1, 128])]; tensor key_73_cast_fp16 = reshape(shape = concat_357x, x = hidden_states_855_cast_fp16)[name = string("key_73_cast_fp16")]; tensor var_4045_shape_cast_fp16 = shape(x = var_4026_cast_fp16)[name = string("op_4045_shape_cast_fp16")]; int32 gather_341 = const()[name = string("gather_341"), val = int32(1)]; int32 gather_342 = const()[name = string("gather_342"), val = int32(8)]; int32 gather_343_axis_0 = const()[name = string("gather_343_axis_0"), val = int32(0)]; int32 gather_343_batch_dims_0 = const()[name = string("gather_343_batch_dims_0"), val = int32(0)]; bool gather_343_validate_indices_0 = const()[name = string("gather_343_validate_indices_0"), val = bool(false)]; string var_4045_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4045_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_343_to_uint16 = const()[name = string("select_343_to_uint16"), val = uint16(2)]; tensor var_4045_shape_cast_fp16_to_uint16 = cast(dtype = var_4045_shape_cast_fp16_to_uint16_dtype_0, x = var_4045_shape_cast_fp16)[name = string("cast_590")]; uint16 gather_343_cast_uint16 = gather(axis = gather_343_axis_0, batch_dims = gather_343_batch_dims_0, indices = select_343_to_uint16, validate_indices = gather_343_validate_indices_0, x = var_4045_shape_cast_fp16_to_uint16)[name = string("gather_343_cast_uint16")]; string gather_343_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_343_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_344 = const()[name = string("gather_344"), val = int32(128)]; tensor var_4052_axes_0 = const()[name = string("op_4052_axes_0"), val = tensor([2])]; tensor var_4052_cast_fp16 = expand_dims(axes = var_4052_axes_0, x = var_4026_cast_fp16)[name = string("op_4052_cast_fp16")]; int32 concat_358_axis_0 = const()[name = string("concat_358_axis_0"), val = int32(0)]; bool concat_358_interleave_0 = const()[name = string("concat_358_interleave_0"), val = bool(false)]; int32 gather_343_cast_uint16_to_int32 = cast(dtype = gather_343_cast_uint16_to_int32_dtype_0, x = gather_343_cast_uint16)[name = string("cast_589")]; tensor concat_358 = concat(axis = concat_358_axis_0, interleave = concat_358_interleave_0, values = (gather_341, gather_342, var_78, gather_343_cast_uint16_to_int32, gather_344))[name = string("concat_358")]; tensor shape_382_cast_fp16 = shape(x = var_4052_cast_fp16)[name = string("shape_382_cast_fp16")]; tensor real_div_37 = real_div(x = concat_358, y = shape_382_cast_fp16)[name = string("real_div_37")]; tensor hidden_states_859_cast_fp16 = tile(reps = real_div_37, x = var_4052_cast_fp16)[name = string("hidden_states_859_cast_fp16")]; tensor concat_359x = const()[name = string("concat_359x"), val = tensor([1, 16, -1, 128])]; tensor value_73_cast_fp16 = reshape(shape = concat_359x, x = hidden_states_859_cast_fp16)[name = string("value_73_cast_fp16")]; tensor var_4062_shape_cast_fp16 = shape(x = key_73_cast_fp16)[name = string("op_4062_shape_cast_fp16")]; int32 gather_345_axis_0 = const()[name = string("gather_345_axis_0"), val = int32(0)]; int32 gather_345_batch_dims_0 = const()[name = string("gather_345_batch_dims_0"), val = int32(0)]; bool gather_345_validate_indices_0 = const()[name = string("gather_345_validate_indices_0"), val = bool(false)]; string var_4062_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4062_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_345_to_uint16 = const()[name = string("select_345_to_uint16"), val = uint16(2)]; tensor var_4062_shape_cast_fp16_to_uint16 = cast(dtype = var_4062_shape_cast_fp16_to_uint16_dtype_0, x = var_4062_shape_cast_fp16)[name = string("cast_588")]; uint16 gather_345_cast_uint16 = gather(axis = gather_345_axis_0, batch_dims = gather_345_batch_dims_0, indices = select_345_to_uint16, validate_indices = gather_345_validate_indices_0, x = var_4062_shape_cast_fp16_to_uint16)[name = string("gather_345_cast_uint16")]; string gather_345_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_345_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_360_values0_0 = const()[name = string("concat_360_values0_0"), val = int32(1)]; int32 concat_360_values1_0 = const()[name = string("concat_360_values1_0"), val = int32(1)]; int32 concat_360_values2_0 = const()[name = string("concat_360_values2_0"), val = int32(0)]; int32 concat_360_axis_0 = const()[name = string("concat_360_axis_0"), val = int32(0)]; bool concat_360_interleave_0 = const()[name = string("concat_360_interleave_0"), val = bool(false)]; int32 gather_345_cast_uint16_to_int32 = cast(dtype = gather_345_cast_uint16_to_int32_dtype_0, x = gather_345_cast_uint16)[name = string("cast_587")]; tensor concat_360 = concat(axis = concat_360_axis_0, interleave = concat_360_interleave_0, values = (concat_360_values0_0, concat_360_values1_0, concat_360_values2_0, gather_345_cast_uint16_to_int32))[name = string("concat_360")]; tensor attention_mask_37_begin_0 = const()[name = string("attention_mask_37_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_37_end_mask_0 = const()[name = string("attention_mask_37_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_37_cast_fp16 = slice_by_index(begin = attention_mask_37_begin_0, end = concat_360, end_mask = attention_mask_37_end_mask_0, x = causal_mask)[name = string("attention_mask_37_cast_fp16")]; tensor mul_18_cast_fp16 = mul(x = query_73_cast_fp16, y = var_85_to_fp16)[name = string("mul_18_cast_fp16")]; bool matmul_18_transpose_y_0 = const()[name = string("matmul_18_transpose_y_0"), val = bool(true)]; bool matmul_18_transpose_x_0 = const()[name = string("matmul_18_transpose_x_0"), val = bool(false)]; tensor matmul_18_cast_fp16 = matmul(transpose_x = matmul_18_transpose_x_0, transpose_y = matmul_18_transpose_y_0, x = mul_18_cast_fp16, y = key_73_cast_fp16)[name = string("matmul_18_cast_fp16")]; tensor add_364_cast_fp16 = add(x = matmul_18_cast_fp16, y = attention_mask_37_cast_fp16)[name = string("add_364_cast_fp16")]; int32 softmax_18_axis_0 = const()[name = string("softmax_18_axis_0"), val = int32(-1)]; tensor softmax_18_cast_fp16 = softmax(axis = softmax_18_axis_0, x = add_364_cast_fp16)[name = string("softmax_18_cast_fp16")]; bool attn_output_73_transpose_x_0 = const()[name = string("attn_output_73_transpose_x_0"), val = bool(false)]; bool attn_output_73_transpose_y_0 = const()[name = string("attn_output_73_transpose_y_0"), val = bool(false)]; tensor attn_output_73_cast_fp16 = matmul(transpose_x = attn_output_73_transpose_x_0, transpose_y = attn_output_73_transpose_y_0, x = softmax_18_cast_fp16, y = value_73_cast_fp16)[name = string("attn_output_73_cast_fp16")]; tensor var_4071_perm_0 = const()[name = string("op_4071_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_361_axis_0 = const()[name = string("concat_361_axis_0"), val = int32(0)]; bool concat_361_interleave_0 = const()[name = string("concat_361_interleave_0"), val = bool(false)]; int32 gather_329_cast_uint16_to_int32 = cast(dtype = gather_329_cast_uint16_to_int32_dtype_0, x = gather_329_cast_uint16)[name = string("cast_593")]; tensor concat_361 = concat(axis = concat_361_axis_0, interleave = concat_361_interleave_0, values = (gather_328, gather_329_cast_uint16_to_int32, var_72))[name = string("concat_361")]; tensor var_4071_cast_fp16 = transpose(perm = var_4071_perm_0, x = attn_output_73_cast_fp16)[name = string("transpose_36")]; tensor var_4074_cast_fp16 = reshape(shape = concat_361, x = var_4071_cast_fp16)[name = string("op_4074_cast_fp16")]; tensor model_model_layers_18_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_18_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(885895936)))]; tensor linear_129_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_18_self_attn_o_proj_weight_to_fp16, x = var_4074_cast_fp16)[name = string("linear_129_cast_fp16")]; tensor hidden_states_863_cast_fp16 = add(x = hidden_states_827_cast_fp16, y = linear_129_cast_fp16)[name = string("hidden_states_863_cast_fp16")]; fp16 var_78_promoted_75_to_fp16 = const()[name = string("op_78_promoted_75_to_fp16"), val = fp16(0x1p+1)]; tensor var_4081_cast_fp16 = pow(x = hidden_states_863_cast_fp16, y = var_78_promoted_75_to_fp16)[name = string("op_4081_cast_fp16")]; tensor variance_151_axes_0 = const()[name = string("variance_151_axes_0"), val = tensor([-1])]; bool variance_151_keep_dims_0 = const()[name = string("variance_151_keep_dims_0"), val = bool(true)]; tensor variance_151_cast_fp16 = reduce_mean(axes = variance_151_axes_0, keep_dims = variance_151_keep_dims_0, x = var_4081_cast_fp16)[name = string("variance_151_cast_fp16")]; fp16 var_4084_to_fp16 = const()[name = string("op_4084_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4085_cast_fp16 = add(x = variance_151_cast_fp16, y = var_4084_to_fp16)[name = string("op_4085_cast_fp16")]; fp32 var_4086_epsilon_0 = const()[name = string("op_4086_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4086_cast_fp16 = rsqrt(epsilon = var_4086_epsilon_0, x = var_4085_cast_fp16)[name = string("op_4086_cast_fp16")]; tensor hidden_states_867_cast_fp16 = mul(x = hidden_states_863_cast_fp16, y = var_4086_cast_fp16)[name = string("hidden_states_867_cast_fp16")]; tensor model_model_layers_18_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_18_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890090304)))]; tensor input_147_cast_fp16 = mul(x = model_model_layers_18_post_attention_layernorm_weight_to_fp16, y = hidden_states_867_cast_fp16)[name = string("input_147_cast_fp16")]; tensor model_model_layers_18_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_18_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(890092416)))]; tensor linear_130_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_18_mlp_gate_proj_weight_to_fp16, x = input_147_cast_fp16)[name = string("linear_130_cast_fp16")]; tensor var_4098_cast_fp16 = silu(x = linear_130_cast_fp16)[name = string("op_4098_cast_fp16")]; tensor model_model_layers_18_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_18_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(896383936)))]; tensor linear_131_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_18_mlp_up_proj_weight_to_fp16, x = input_147_cast_fp16)[name = string("linear_131_cast_fp16")]; tensor input_151_cast_fp16 = mul(x = var_4098_cast_fp16, y = linear_131_cast_fp16)[name = string("input_151_cast_fp16")]; tensor model_model_layers_18_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_18_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(902675456)))]; tensor linear_132_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_18_mlp_down_proj_weight_to_fp16, x = input_151_cast_fp16)[name = string("linear_132_cast_fp16")]; tensor hidden_states_873_cast_fp16 = add(x = hidden_states_863_cast_fp16, y = linear_132_cast_fp16)[name = string("hidden_states_873_cast_fp16")]; fp16 var_78_promoted_76_to_fp16 = const()[name = string("op_78_promoted_76_to_fp16"), val = fp16(0x1p+1)]; tensor var_4111_cast_fp16 = pow(x = hidden_states_873_cast_fp16, y = var_78_promoted_76_to_fp16)[name = string("op_4111_cast_fp16")]; tensor variance_153_axes_0 = const()[name = string("variance_153_axes_0"), val = tensor([-1])]; bool variance_153_keep_dims_0 = const()[name = string("variance_153_keep_dims_0"), val = bool(true)]; tensor variance_153_cast_fp16 = reduce_mean(axes = variance_153_axes_0, keep_dims = variance_153_keep_dims_0, x = var_4111_cast_fp16)[name = string("variance_153_cast_fp16")]; fp16 var_4114_to_fp16 = const()[name = string("op_4114_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4115_cast_fp16 = add(x = variance_153_cast_fp16, y = var_4114_to_fp16)[name = string("op_4115_cast_fp16")]; fp32 var_4116_epsilon_0 = const()[name = string("op_4116_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4116_cast_fp16 = rsqrt(epsilon = var_4116_epsilon_0, x = var_4115_cast_fp16)[name = string("op_4116_cast_fp16")]; tensor hidden_states_877_cast_fp16 = mul(x = hidden_states_873_cast_fp16, y = var_4116_cast_fp16)[name = string("hidden_states_877_cast_fp16")]; tensor model_model_layers_19_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(908966976)))]; tensor hidden_states_881_cast_fp16 = mul(x = model_model_layers_19_input_layernorm_weight_to_fp16, y = hidden_states_877_cast_fp16)[name = string("hidden_states_881_cast_fp16")]; tensor var_4129_shape_cast_fp16 = shape(x = hidden_states_881_cast_fp16)[name = string("op_4129_shape_cast_fp16")]; int32 gather_346 = const()[name = string("gather_346"), val = int32(1)]; int32 gather_347_axis_0 = const()[name = string("gather_347_axis_0"), val = int32(0)]; int32 gather_347_batch_dims_0 = const()[name = string("gather_347_batch_dims_0"), val = int32(0)]; bool gather_347_validate_indices_0 = const()[name = string("gather_347_validate_indices_0"), val = bool(false)]; string var_4129_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4129_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_347_to_uint16 = const()[name = string("select_347_to_uint16"), val = uint16(1)]; tensor var_4129_shape_cast_fp16_to_uint16 = cast(dtype = var_4129_shape_cast_fp16_to_uint16_dtype_0, x = var_4129_shape_cast_fp16)[name = string("cast_586")]; uint16 gather_347_cast_uint16 = gather(axis = gather_347_axis_0, batch_dims = gather_347_batch_dims_0, indices = select_347_to_uint16, validate_indices = gather_347_validate_indices_0, x = var_4129_shape_cast_fp16_to_uint16)[name = string("gather_347_cast_uint16")]; string gather_347_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_347_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_19_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(908969088)))]; tensor linear_133_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_19_self_attn_q_proj_weight_to_fp16, x = hidden_states_881_cast_fp16)[name = string("linear_133_cast_fp16")]; tensor concat_362x = const()[name = string("concat_362x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_883_cast_fp16 = reshape(shape = concat_362x, x = linear_133_cast_fp16)[name = string("hidden_states_883_cast_fp16")]; fp16 var_78_promoted_77_to_fp16 = const()[name = string("op_78_promoted_77_to_fp16"), val = fp16(0x1p+1)]; tensor var_4137_cast_fp16 = pow(x = hidden_states_883_cast_fp16, y = var_78_promoted_77_to_fp16)[name = string("op_4137_cast_fp16")]; tensor variance_155_axes_0 = const()[name = string("variance_155_axes_0"), val = tensor([-1])]; bool variance_155_keep_dims_0 = const()[name = string("variance_155_keep_dims_0"), val = bool(true)]; tensor variance_155_cast_fp16 = reduce_mean(axes = variance_155_axes_0, keep_dims = variance_155_keep_dims_0, x = var_4137_cast_fp16)[name = string("variance_155_cast_fp16")]; fp16 var_4140_to_fp16 = const()[name = string("op_4140_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4141_cast_fp16 = add(x = variance_155_cast_fp16, y = var_4140_to_fp16)[name = string("op_4141_cast_fp16")]; fp32 var_4142_epsilon_0 = const()[name = string("op_4142_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4142_cast_fp16 = rsqrt(epsilon = var_4142_epsilon_0, x = var_4141_cast_fp16)[name = string("op_4142_cast_fp16")]; tensor hidden_states_887_cast_fp16 = mul(x = hidden_states_883_cast_fp16, y = var_4142_cast_fp16)[name = string("hidden_states_887_cast_fp16")]; tensor model_model_layers_19_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(913163456)))]; tensor var_4145_cast_fp16 = mul(x = model_model_layers_19_self_attn_q_norm_weight_to_fp16, y = hidden_states_887_cast_fp16)[name = string("op_4145_cast_fp16")]; tensor q_39_perm_0 = const()[name = string("q_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_19_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(913163776)))]; tensor linear_134_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_19_self_attn_k_proj_weight_to_fp16, x = hidden_states_881_cast_fp16)[name = string("linear_134_cast_fp16")]; tensor concat_363x = const()[name = string("concat_363x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_891_cast_fp16 = reshape(shape = concat_363x, x = linear_134_cast_fp16)[name = string("hidden_states_891_cast_fp16")]; fp16 var_78_promoted_78_to_fp16 = const()[name = string("op_78_promoted_78_to_fp16"), val = fp16(0x1p+1)]; tensor var_4153_cast_fp16 = pow(x = hidden_states_891_cast_fp16, y = var_78_promoted_78_to_fp16)[name = string("op_4153_cast_fp16")]; tensor variance_157_axes_0 = const()[name = string("variance_157_axes_0"), val = tensor([-1])]; bool variance_157_keep_dims_0 = const()[name = string("variance_157_keep_dims_0"), val = bool(true)]; tensor variance_157_cast_fp16 = reduce_mean(axes = variance_157_axes_0, keep_dims = variance_157_keep_dims_0, x = var_4153_cast_fp16)[name = string("variance_157_cast_fp16")]; fp16 var_4156_to_fp16 = const()[name = string("op_4156_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4157_cast_fp16 = add(x = variance_157_cast_fp16, y = var_4156_to_fp16)[name = string("op_4157_cast_fp16")]; fp32 var_4158_epsilon_0 = const()[name = string("op_4158_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4158_cast_fp16 = rsqrt(epsilon = var_4158_epsilon_0, x = var_4157_cast_fp16)[name = string("op_4158_cast_fp16")]; tensor hidden_states_895_cast_fp16 = mul(x = hidden_states_891_cast_fp16, y = var_4158_cast_fp16)[name = string("hidden_states_895_cast_fp16")]; tensor model_model_layers_19_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(915260992)))]; tensor var_4161_cast_fp16 = mul(x = model_model_layers_19_self_attn_k_norm_weight_to_fp16, y = hidden_states_895_cast_fp16)[name = string("op_4161_cast_fp16")]; tensor k_39_perm_0 = const()[name = string("k_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_19_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(915261312)))]; tensor linear_135_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_19_self_attn_v_proj_weight_to_fp16, x = hidden_states_881_cast_fp16)[name = string("linear_135_cast_fp16")]; tensor concat_364x = const()[name = string("concat_364x"), val = tensor([1, -1, 8, 128])]; tensor var_4166_cast_fp16 = reshape(shape = concat_364x, x = linear_135_cast_fp16)[name = string("op_4166_cast_fp16")]; tensor v_state_39_perm_0 = const()[name = string("v_state_39_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_39_cast_fp16 = transpose(perm = q_39_perm_0, x = var_4145_cast_fp16)[name = string("transpose_35")]; tensor var_4170_cast_fp16 = mul(x = q_39_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4170_cast_fp16")]; tensor x1_77_begin_0 = const()[name = string("x1_77_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_77_end_0 = const()[name = string("x1_77_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_77_end_mask_0 = const()[name = string("x1_77_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_77_cast_fp16 = slice_by_index(begin = x1_77_begin_0, end = x1_77_end_0, end_mask = x1_77_end_mask_0, x = q_39_cast_fp16)[name = string("x1_77_cast_fp16")]; tensor x2_77_begin_0 = const()[name = string("x2_77_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_77_end_0 = const()[name = string("x2_77_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_77_end_mask_0 = const()[name = string("x2_77_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_77_cast_fp16 = slice_by_index(begin = x2_77_begin_0, end = x2_77_end_0, end_mask = x2_77_end_mask_0, x = q_39_cast_fp16)[name = string("x2_77_cast_fp16")]; fp16 const_42_promoted_to_fp16 = const()[name = string("const_42_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4181_cast_fp16 = mul(x = x2_77_cast_fp16, y = const_42_promoted_to_fp16)[name = string("op_4181_cast_fp16")]; bool var_4183_interleave_0 = const()[name = string("op_4183_interleave_0"), val = bool(false)]; tensor var_4183_cast_fp16 = concat(axis = var_72, interleave = var_4183_interleave_0, values = (var_4181_cast_fp16, x1_77_cast_fp16))[name = string("op_4183_cast_fp16")]; tensor var_4184_cast_fp16 = mul(x = var_4183_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4184_cast_fp16")]; tensor query_77_cast_fp16 = add(x = var_4170_cast_fp16, y = var_4184_cast_fp16)[name = string("query_77_cast_fp16")]; tensor k_39_cast_fp16 = transpose(perm = k_39_perm_0, x = var_4161_cast_fp16)[name = string("transpose_34")]; tensor var_4186_cast_fp16 = mul(x = k_39_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4186_cast_fp16")]; tensor x1_79_begin_0 = const()[name = string("x1_79_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_79_end_0 = const()[name = string("x1_79_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_79_end_mask_0 = const()[name = string("x1_79_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_79_cast_fp16 = slice_by_index(begin = x1_79_begin_0, end = x1_79_end_0, end_mask = x1_79_end_mask_0, x = k_39_cast_fp16)[name = string("x1_79_cast_fp16")]; tensor x2_79_begin_0 = const()[name = string("x2_79_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_79_end_0 = const()[name = string("x2_79_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_79_end_mask_0 = const()[name = string("x2_79_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_79_cast_fp16 = slice_by_index(begin = x2_79_begin_0, end = x2_79_end_0, end_mask = x2_79_end_mask_0, x = k_39_cast_fp16)[name = string("x2_79_cast_fp16")]; fp16 const_43_promoted_to_fp16 = const()[name = string("const_43_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4197_cast_fp16 = mul(x = x2_79_cast_fp16, y = const_43_promoted_to_fp16)[name = string("op_4197_cast_fp16")]; bool var_4199_interleave_0 = const()[name = string("op_4199_interleave_0"), val = bool(false)]; tensor var_4199_cast_fp16 = concat(axis = var_72, interleave = var_4199_interleave_0, values = (var_4197_cast_fp16, x1_79_cast_fp16))[name = string("op_4199_cast_fp16")]; tensor var_4200_cast_fp16 = mul(x = var_4199_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4200_cast_fp16")]; tensor k_state_39_cast_fp16 = add(x = var_4186_cast_fp16, y = var_4200_cast_fp16)[name = string("k_state_39_cast_fp16")]; tensor expand_dims_228 = const()[name = string("expand_dims_228"), val = tensor([0])]; tensor expand_dims_229 = const()[name = string("expand_dims_229"), val = tensor([0])]; tensor expand_dims_231 = const()[name = string("expand_dims_231"), val = tensor([0])]; tensor concat_367_values0_0 = const()[name = string("concat_367_values0_0"), val = tensor([19])]; int32 concat_367_axis_0 = const()[name = string("concat_367_axis_0"), val = int32(0)]; bool concat_367_interleave_0 = const()[name = string("concat_367_interleave_0"), val = bool(false)]; tensor concat_367 = concat(axis = concat_367_axis_0, interleave = concat_367_interleave_0, values = (concat_367_values0_0, expand_dims_228, expand_dims_229, expand_dims_2, expand_dims_231))[name = string("concat_367")]; tensor key_cache_internal_tensor_assign_20_stride_0 = const()[name = string("key_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_367, begin_mask = key_cache_internal_tensor_assign_20_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_20_squeeze_mask_0, stride = key_cache_internal_tensor_assign_20_stride_0, update = k_state_39_cast_fp16, x = coreml_update_state_92)[name = string("key_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_20_cast_fp16, input = key_cache)[name = string("coreml_update_state_94_write_state")]; tensor coreml_update_state_94 = read_state(input = key_cache)[name = string("coreml_update_state_94")]; tensor value_cache_internal_tensor_assign_20_stride_0 = const()[name = string("value_cache_internal_tensor_assign_20_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_20_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_20_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_20_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_20_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_39_cast_fp16 = transpose(perm = v_state_39_perm_0, x = var_4166_cast_fp16)[name = string("transpose_33")]; tensor value_cache_internal_tensor_assign_20_cast_fp16 = slice_update(begin = concat_367, begin_mask = value_cache_internal_tensor_assign_20_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_20_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_20_squeeze_mask_0, stride = value_cache_internal_tensor_assign_20_stride_0, update = v_state_39_cast_fp16, x = coreml_update_state_93)[name = string("value_cache_internal_tensor_assign_20_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_20_cast_fp16, input = value_cache)[name = string("coreml_update_state_95_write_state")]; tensor coreml_update_state_95 = read_state(input = value_cache)[name = string("coreml_update_state_95")]; tensor var_4223_begin_0 = const()[name = string("op_4223_begin_0"), val = tensor([19, 0, 0, 0, 0])]; tensor var_4223_end_0 = const()[name = string("op_4223_end_0"), val = tensor([20, 1, 8, 2048, 128])]; tensor var_4223_end_mask_0 = const()[name = string("op_4223_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4223_squeeze_mask_0 = const()[name = string("op_4223_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4223_cast_fp16 = slice_by_index(begin = var_4223_begin_0, end = var_4223_end_0, end_mask = var_4223_end_mask_0, squeeze_mask = var_4223_squeeze_mask_0, x = coreml_update_state_94)[name = string("op_4223_cast_fp16")]; tensor var_4226_begin_0 = const()[name = string("op_4226_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4226_end_mask_0 = const()[name = string("op_4226_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4226_cast_fp16 = slice_by_index(begin = var_4226_begin_0, end = concat_12, end_mask = var_4226_end_mask_0, x = var_4223_cast_fp16)[name = string("op_4226_cast_fp16")]; tensor var_4228_begin_0 = const()[name = string("op_4228_begin_0"), val = tensor([19, 0, 0, 0, 0])]; tensor var_4228_end_0 = const()[name = string("op_4228_end_0"), val = tensor([20, 1, 8, 2048, 128])]; tensor var_4228_end_mask_0 = const()[name = string("op_4228_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4228_squeeze_mask_0 = const()[name = string("op_4228_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4228_cast_fp16 = slice_by_index(begin = var_4228_begin_0, end = var_4228_end_0, end_mask = var_4228_end_mask_0, squeeze_mask = var_4228_squeeze_mask_0, x = coreml_update_state_95)[name = string("op_4228_cast_fp16")]; tensor var_4231_begin_0 = const()[name = string("op_4231_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4231_end_mask_0 = const()[name = string("op_4231_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4231_cast_fp16 = slice_by_index(begin = var_4231_begin_0, end = concat_12, end_mask = var_4231_end_mask_0, x = var_4228_cast_fp16)[name = string("op_4231_cast_fp16")]; tensor var_4233_shape_cast_fp16 = shape(x = var_4226_cast_fp16)[name = string("op_4233_shape_cast_fp16")]; int32 gather_355 = const()[name = string("gather_355"), val = int32(1)]; int32 gather_356 = const()[name = string("gather_356"), val = int32(8)]; int32 gather_357_axis_0 = const()[name = string("gather_357_axis_0"), val = int32(0)]; int32 gather_357_batch_dims_0 = const()[name = string("gather_357_batch_dims_0"), val = int32(0)]; bool gather_357_validate_indices_0 = const()[name = string("gather_357_validate_indices_0"), val = bool(false)]; string var_4233_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4233_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_357_to_uint16 = const()[name = string("select_357_to_uint16"), val = uint16(2)]; tensor var_4233_shape_cast_fp16_to_uint16 = cast(dtype = var_4233_shape_cast_fp16_to_uint16_dtype_0, x = var_4233_shape_cast_fp16)[name = string("cast_584")]; uint16 gather_357_cast_uint16 = gather(axis = gather_357_axis_0, batch_dims = gather_357_batch_dims_0, indices = select_357_to_uint16, validate_indices = gather_357_validate_indices_0, x = var_4233_shape_cast_fp16_to_uint16)[name = string("gather_357_cast_uint16")]; string gather_357_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_357_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_358 = const()[name = string("gather_358"), val = int32(128)]; tensor var_4240_axes_0 = const()[name = string("op_4240_axes_0"), val = tensor([2])]; tensor var_4240_cast_fp16 = expand_dims(axes = var_4240_axes_0, x = var_4226_cast_fp16)[name = string("op_4240_cast_fp16")]; int32 concat_375_axis_0 = const()[name = string("concat_375_axis_0"), val = int32(0)]; bool concat_375_interleave_0 = const()[name = string("concat_375_interleave_0"), val = bool(false)]; int32 gather_357_cast_uint16_to_int32 = cast(dtype = gather_357_cast_uint16_to_int32_dtype_0, x = gather_357_cast_uint16)[name = string("cast_583")]; tensor concat_375 = concat(axis = concat_375_axis_0, interleave = concat_375_interleave_0, values = (gather_355, gather_356, var_78, gather_357_cast_uint16_to_int32, gather_358))[name = string("concat_375")]; tensor shape_397_cast_fp16 = shape(x = var_4240_cast_fp16)[name = string("shape_397_cast_fp16")]; tensor real_div_38 = real_div(x = concat_375, y = shape_397_cast_fp16)[name = string("real_div_38")]; tensor hidden_states_901_cast_fp16 = tile(reps = real_div_38, x = var_4240_cast_fp16)[name = string("hidden_states_901_cast_fp16")]; tensor concat_376x = const()[name = string("concat_376x"), val = tensor([1, 16, -1, 128])]; tensor key_77_cast_fp16 = reshape(shape = concat_376x, x = hidden_states_901_cast_fp16)[name = string("key_77_cast_fp16")]; tensor var_4250_shape_cast_fp16 = shape(x = var_4231_cast_fp16)[name = string("op_4250_shape_cast_fp16")]; int32 gather_359 = const()[name = string("gather_359"), val = int32(1)]; int32 gather_360 = const()[name = string("gather_360"), val = int32(8)]; int32 gather_361_axis_0 = const()[name = string("gather_361_axis_0"), val = int32(0)]; int32 gather_361_batch_dims_0 = const()[name = string("gather_361_batch_dims_0"), val = int32(0)]; bool gather_361_validate_indices_0 = const()[name = string("gather_361_validate_indices_0"), val = bool(false)]; string var_4250_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4250_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_361_to_uint16 = const()[name = string("select_361_to_uint16"), val = uint16(2)]; tensor var_4250_shape_cast_fp16_to_uint16 = cast(dtype = var_4250_shape_cast_fp16_to_uint16_dtype_0, x = var_4250_shape_cast_fp16)[name = string("cast_582")]; uint16 gather_361_cast_uint16 = gather(axis = gather_361_axis_0, batch_dims = gather_361_batch_dims_0, indices = select_361_to_uint16, validate_indices = gather_361_validate_indices_0, x = var_4250_shape_cast_fp16_to_uint16)[name = string("gather_361_cast_uint16")]; string gather_361_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_361_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_362 = const()[name = string("gather_362"), val = int32(128)]; tensor var_4257_axes_0 = const()[name = string("op_4257_axes_0"), val = tensor([2])]; tensor var_4257_cast_fp16 = expand_dims(axes = var_4257_axes_0, x = var_4231_cast_fp16)[name = string("op_4257_cast_fp16")]; int32 concat_377_axis_0 = const()[name = string("concat_377_axis_0"), val = int32(0)]; bool concat_377_interleave_0 = const()[name = string("concat_377_interleave_0"), val = bool(false)]; int32 gather_361_cast_uint16_to_int32 = cast(dtype = gather_361_cast_uint16_to_int32_dtype_0, x = gather_361_cast_uint16)[name = string("cast_581")]; tensor concat_377 = concat(axis = concat_377_axis_0, interleave = concat_377_interleave_0, values = (gather_359, gather_360, var_78, gather_361_cast_uint16_to_int32, gather_362))[name = string("concat_377")]; tensor shape_402_cast_fp16 = shape(x = var_4257_cast_fp16)[name = string("shape_402_cast_fp16")]; tensor real_div_39 = real_div(x = concat_377, y = shape_402_cast_fp16)[name = string("real_div_39")]; tensor hidden_states_905_cast_fp16 = tile(reps = real_div_39, x = var_4257_cast_fp16)[name = string("hidden_states_905_cast_fp16")]; tensor concat_378x = const()[name = string("concat_378x"), val = tensor([1, 16, -1, 128])]; tensor value_77_cast_fp16 = reshape(shape = concat_378x, x = hidden_states_905_cast_fp16)[name = string("value_77_cast_fp16")]; tensor var_4267_shape_cast_fp16 = shape(x = key_77_cast_fp16)[name = string("op_4267_shape_cast_fp16")]; int32 gather_363_axis_0 = const()[name = string("gather_363_axis_0"), val = int32(0)]; int32 gather_363_batch_dims_0 = const()[name = string("gather_363_batch_dims_0"), val = int32(0)]; bool gather_363_validate_indices_0 = const()[name = string("gather_363_validate_indices_0"), val = bool(false)]; string var_4267_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4267_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_363_to_uint16 = const()[name = string("select_363_to_uint16"), val = uint16(2)]; tensor var_4267_shape_cast_fp16_to_uint16 = cast(dtype = var_4267_shape_cast_fp16_to_uint16_dtype_0, x = var_4267_shape_cast_fp16)[name = string("cast_580")]; uint16 gather_363_cast_uint16 = gather(axis = gather_363_axis_0, batch_dims = gather_363_batch_dims_0, indices = select_363_to_uint16, validate_indices = gather_363_validate_indices_0, x = var_4267_shape_cast_fp16_to_uint16)[name = string("gather_363_cast_uint16")]; string gather_363_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_363_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_379_values0_0 = const()[name = string("concat_379_values0_0"), val = int32(1)]; int32 concat_379_values1_0 = const()[name = string("concat_379_values1_0"), val = int32(1)]; int32 concat_379_values2_0 = const()[name = string("concat_379_values2_0"), val = int32(0)]; int32 concat_379_axis_0 = const()[name = string("concat_379_axis_0"), val = int32(0)]; bool concat_379_interleave_0 = const()[name = string("concat_379_interleave_0"), val = bool(false)]; int32 gather_363_cast_uint16_to_int32 = cast(dtype = gather_363_cast_uint16_to_int32_dtype_0, x = gather_363_cast_uint16)[name = string("cast_579")]; tensor concat_379 = concat(axis = concat_379_axis_0, interleave = concat_379_interleave_0, values = (concat_379_values0_0, concat_379_values1_0, concat_379_values2_0, gather_363_cast_uint16_to_int32))[name = string("concat_379")]; tensor attention_mask_39_begin_0 = const()[name = string("attention_mask_39_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_39_end_mask_0 = const()[name = string("attention_mask_39_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_39_cast_fp16 = slice_by_index(begin = attention_mask_39_begin_0, end = concat_379, end_mask = attention_mask_39_end_mask_0, x = causal_mask)[name = string("attention_mask_39_cast_fp16")]; tensor mul_19_cast_fp16 = mul(x = query_77_cast_fp16, y = var_85_to_fp16)[name = string("mul_19_cast_fp16")]; bool matmul_19_transpose_y_0 = const()[name = string("matmul_19_transpose_y_0"), val = bool(true)]; bool matmul_19_transpose_x_0 = const()[name = string("matmul_19_transpose_x_0"), val = bool(false)]; tensor matmul_19_cast_fp16 = matmul(transpose_x = matmul_19_transpose_x_0, transpose_y = matmul_19_transpose_y_0, x = mul_19_cast_fp16, y = key_77_cast_fp16)[name = string("matmul_19_cast_fp16")]; tensor add_383_cast_fp16 = add(x = matmul_19_cast_fp16, y = attention_mask_39_cast_fp16)[name = string("add_383_cast_fp16")]; int32 softmax_19_axis_0 = const()[name = string("softmax_19_axis_0"), val = int32(-1)]; tensor softmax_19_cast_fp16 = softmax(axis = softmax_19_axis_0, x = add_383_cast_fp16)[name = string("softmax_19_cast_fp16")]; bool attn_output_77_transpose_x_0 = const()[name = string("attn_output_77_transpose_x_0"), val = bool(false)]; bool attn_output_77_transpose_y_0 = const()[name = string("attn_output_77_transpose_y_0"), val = bool(false)]; tensor attn_output_77_cast_fp16 = matmul(transpose_x = attn_output_77_transpose_x_0, transpose_y = attn_output_77_transpose_y_0, x = softmax_19_cast_fp16, y = value_77_cast_fp16)[name = string("attn_output_77_cast_fp16")]; tensor var_4276_perm_0 = const()[name = string("op_4276_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_380_axis_0 = const()[name = string("concat_380_axis_0"), val = int32(0)]; bool concat_380_interleave_0 = const()[name = string("concat_380_interleave_0"), val = bool(false)]; int32 gather_347_cast_uint16_to_int32 = cast(dtype = gather_347_cast_uint16_to_int32_dtype_0, x = gather_347_cast_uint16)[name = string("cast_585")]; tensor concat_380 = concat(axis = concat_380_axis_0, interleave = concat_380_interleave_0, values = (gather_346, gather_347_cast_uint16_to_int32, var_72))[name = string("concat_380")]; tensor var_4276_cast_fp16 = transpose(perm = var_4276_perm_0, x = attn_output_77_cast_fp16)[name = string("transpose_32")]; tensor var_4279_cast_fp16 = reshape(shape = concat_380, x = var_4276_cast_fp16)[name = string("op_4279_cast_fp16")]; tensor model_model_layers_19_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_19_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(917358528)))]; tensor linear_136_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_19_self_attn_o_proj_weight_to_fp16, x = var_4279_cast_fp16)[name = string("linear_136_cast_fp16")]; tensor hidden_states_909_cast_fp16 = add(x = hidden_states_873_cast_fp16, y = linear_136_cast_fp16)[name = string("hidden_states_909_cast_fp16")]; fp16 var_78_promoted_79_to_fp16 = const()[name = string("op_78_promoted_79_to_fp16"), val = fp16(0x1p+1)]; tensor var_4286_cast_fp16 = pow(x = hidden_states_909_cast_fp16, y = var_78_promoted_79_to_fp16)[name = string("op_4286_cast_fp16")]; tensor variance_159_axes_0 = const()[name = string("variance_159_axes_0"), val = tensor([-1])]; bool variance_159_keep_dims_0 = const()[name = string("variance_159_keep_dims_0"), val = bool(true)]; tensor variance_159_cast_fp16 = reduce_mean(axes = variance_159_axes_0, keep_dims = variance_159_keep_dims_0, x = var_4286_cast_fp16)[name = string("variance_159_cast_fp16")]; fp16 var_4289_to_fp16 = const()[name = string("op_4289_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4290_cast_fp16 = add(x = variance_159_cast_fp16, y = var_4289_to_fp16)[name = string("op_4290_cast_fp16")]; fp32 var_4291_epsilon_0 = const()[name = string("op_4291_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4291_cast_fp16 = rsqrt(epsilon = var_4291_epsilon_0, x = var_4290_cast_fp16)[name = string("op_4291_cast_fp16")]; tensor hidden_states_913_cast_fp16 = mul(x = hidden_states_909_cast_fp16, y = var_4291_cast_fp16)[name = string("hidden_states_913_cast_fp16")]; tensor model_model_layers_19_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_19_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(921552896)))]; tensor input_155_cast_fp16 = mul(x = model_model_layers_19_post_attention_layernorm_weight_to_fp16, y = hidden_states_913_cast_fp16)[name = string("input_155_cast_fp16")]; tensor model_model_layers_19_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_19_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(921555008)))]; tensor linear_137_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_19_mlp_gate_proj_weight_to_fp16, x = input_155_cast_fp16)[name = string("linear_137_cast_fp16")]; tensor var_4303_cast_fp16 = silu(x = linear_137_cast_fp16)[name = string("op_4303_cast_fp16")]; tensor model_model_layers_19_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_19_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(927846528)))]; tensor linear_138_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_19_mlp_up_proj_weight_to_fp16, x = input_155_cast_fp16)[name = string("linear_138_cast_fp16")]; tensor input_159_cast_fp16 = mul(x = var_4303_cast_fp16, y = linear_138_cast_fp16)[name = string("input_159_cast_fp16")]; tensor model_model_layers_19_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_19_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(934138048)))]; tensor linear_139_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_19_mlp_down_proj_weight_to_fp16, x = input_159_cast_fp16)[name = string("linear_139_cast_fp16")]; tensor hidden_states_919_cast_fp16 = add(x = hidden_states_909_cast_fp16, y = linear_139_cast_fp16)[name = string("hidden_states_919_cast_fp16")]; fp16 var_78_promoted_80_to_fp16 = const()[name = string("op_78_promoted_80_to_fp16"), val = fp16(0x1p+1)]; tensor var_4316_cast_fp16 = pow(x = hidden_states_919_cast_fp16, y = var_78_promoted_80_to_fp16)[name = string("op_4316_cast_fp16")]; tensor variance_161_axes_0 = const()[name = string("variance_161_axes_0"), val = tensor([-1])]; bool variance_161_keep_dims_0 = const()[name = string("variance_161_keep_dims_0"), val = bool(true)]; tensor variance_161_cast_fp16 = reduce_mean(axes = variance_161_axes_0, keep_dims = variance_161_keep_dims_0, x = var_4316_cast_fp16)[name = string("variance_161_cast_fp16")]; fp16 var_4319_to_fp16 = const()[name = string("op_4319_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4320_cast_fp16 = add(x = variance_161_cast_fp16, y = var_4319_to_fp16)[name = string("op_4320_cast_fp16")]; fp32 var_4321_epsilon_0 = const()[name = string("op_4321_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4321_cast_fp16 = rsqrt(epsilon = var_4321_epsilon_0, x = var_4320_cast_fp16)[name = string("op_4321_cast_fp16")]; tensor hidden_states_923_cast_fp16 = mul(x = hidden_states_919_cast_fp16, y = var_4321_cast_fp16)[name = string("hidden_states_923_cast_fp16")]; tensor model_model_layers_20_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(940429568)))]; tensor hidden_states_927_cast_fp16 = mul(x = model_model_layers_20_input_layernorm_weight_to_fp16, y = hidden_states_923_cast_fp16)[name = string("hidden_states_927_cast_fp16")]; tensor var_4334_shape_cast_fp16 = shape(x = hidden_states_927_cast_fp16)[name = string("op_4334_shape_cast_fp16")]; int32 gather_364 = const()[name = string("gather_364"), val = int32(1)]; int32 gather_365_axis_0 = const()[name = string("gather_365_axis_0"), val = int32(0)]; int32 gather_365_batch_dims_0 = const()[name = string("gather_365_batch_dims_0"), val = int32(0)]; bool gather_365_validate_indices_0 = const()[name = string("gather_365_validate_indices_0"), val = bool(false)]; string var_4334_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4334_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_365_to_uint16 = const()[name = string("select_365_to_uint16"), val = uint16(1)]; tensor var_4334_shape_cast_fp16_to_uint16 = cast(dtype = var_4334_shape_cast_fp16_to_uint16_dtype_0, x = var_4334_shape_cast_fp16)[name = string("cast_578")]; uint16 gather_365_cast_uint16 = gather(axis = gather_365_axis_0, batch_dims = gather_365_batch_dims_0, indices = select_365_to_uint16, validate_indices = gather_365_validate_indices_0, x = var_4334_shape_cast_fp16_to_uint16)[name = string("gather_365_cast_uint16")]; string gather_365_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_365_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_20_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(940431680)))]; tensor linear_140_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_20_self_attn_q_proj_weight_to_fp16, x = hidden_states_927_cast_fp16)[name = string("linear_140_cast_fp16")]; tensor concat_381x = const()[name = string("concat_381x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_929_cast_fp16 = reshape(shape = concat_381x, x = linear_140_cast_fp16)[name = string("hidden_states_929_cast_fp16")]; fp16 var_78_promoted_81_to_fp16 = const()[name = string("op_78_promoted_81_to_fp16"), val = fp16(0x1p+1)]; tensor var_4342_cast_fp16 = pow(x = hidden_states_929_cast_fp16, y = var_78_promoted_81_to_fp16)[name = string("op_4342_cast_fp16")]; tensor variance_163_axes_0 = const()[name = string("variance_163_axes_0"), val = tensor([-1])]; bool variance_163_keep_dims_0 = const()[name = string("variance_163_keep_dims_0"), val = bool(true)]; tensor variance_163_cast_fp16 = reduce_mean(axes = variance_163_axes_0, keep_dims = variance_163_keep_dims_0, x = var_4342_cast_fp16)[name = string("variance_163_cast_fp16")]; fp16 var_4345_to_fp16 = const()[name = string("op_4345_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4346_cast_fp16 = add(x = variance_163_cast_fp16, y = var_4345_to_fp16)[name = string("op_4346_cast_fp16")]; fp32 var_4347_epsilon_0 = const()[name = string("op_4347_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4347_cast_fp16 = rsqrt(epsilon = var_4347_epsilon_0, x = var_4346_cast_fp16)[name = string("op_4347_cast_fp16")]; tensor hidden_states_933_cast_fp16 = mul(x = hidden_states_929_cast_fp16, y = var_4347_cast_fp16)[name = string("hidden_states_933_cast_fp16")]; tensor model_model_layers_20_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944626048)))]; tensor var_4350_cast_fp16 = mul(x = model_model_layers_20_self_attn_q_norm_weight_to_fp16, y = hidden_states_933_cast_fp16)[name = string("op_4350_cast_fp16")]; tensor q_41_perm_0 = const()[name = string("q_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_20_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(944626368)))]; tensor linear_141_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_20_self_attn_k_proj_weight_to_fp16, x = hidden_states_927_cast_fp16)[name = string("linear_141_cast_fp16")]; tensor concat_382x = const()[name = string("concat_382x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_937_cast_fp16 = reshape(shape = concat_382x, x = linear_141_cast_fp16)[name = string("hidden_states_937_cast_fp16")]; fp16 var_78_promoted_82_to_fp16 = const()[name = string("op_78_promoted_82_to_fp16"), val = fp16(0x1p+1)]; tensor var_4358_cast_fp16 = pow(x = hidden_states_937_cast_fp16, y = var_78_promoted_82_to_fp16)[name = string("op_4358_cast_fp16")]; tensor variance_165_axes_0 = const()[name = string("variance_165_axes_0"), val = tensor([-1])]; bool variance_165_keep_dims_0 = const()[name = string("variance_165_keep_dims_0"), val = bool(true)]; tensor variance_165_cast_fp16 = reduce_mean(axes = variance_165_axes_0, keep_dims = variance_165_keep_dims_0, x = var_4358_cast_fp16)[name = string("variance_165_cast_fp16")]; fp16 var_4361_to_fp16 = const()[name = string("op_4361_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4362_cast_fp16 = add(x = variance_165_cast_fp16, y = var_4361_to_fp16)[name = string("op_4362_cast_fp16")]; fp32 var_4363_epsilon_0 = const()[name = string("op_4363_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4363_cast_fp16 = rsqrt(epsilon = var_4363_epsilon_0, x = var_4362_cast_fp16)[name = string("op_4363_cast_fp16")]; tensor hidden_states_941_cast_fp16 = mul(x = hidden_states_937_cast_fp16, y = var_4363_cast_fp16)[name = string("hidden_states_941_cast_fp16")]; tensor model_model_layers_20_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(946723584)))]; tensor var_4366_cast_fp16 = mul(x = model_model_layers_20_self_attn_k_norm_weight_to_fp16, y = hidden_states_941_cast_fp16)[name = string("op_4366_cast_fp16")]; tensor k_41_perm_0 = const()[name = string("k_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_20_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(946723904)))]; tensor linear_142_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_20_self_attn_v_proj_weight_to_fp16, x = hidden_states_927_cast_fp16)[name = string("linear_142_cast_fp16")]; tensor concat_383x = const()[name = string("concat_383x"), val = tensor([1, -1, 8, 128])]; tensor var_4371_cast_fp16 = reshape(shape = concat_383x, x = linear_142_cast_fp16)[name = string("op_4371_cast_fp16")]; tensor v_state_41_perm_0 = const()[name = string("v_state_41_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_41_cast_fp16 = transpose(perm = q_41_perm_0, x = var_4350_cast_fp16)[name = string("transpose_31")]; tensor var_4375_cast_fp16 = mul(x = q_41_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4375_cast_fp16")]; tensor x1_81_begin_0 = const()[name = string("x1_81_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_81_end_0 = const()[name = string("x1_81_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_81_end_mask_0 = const()[name = string("x1_81_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_81_cast_fp16 = slice_by_index(begin = x1_81_begin_0, end = x1_81_end_0, end_mask = x1_81_end_mask_0, x = q_41_cast_fp16)[name = string("x1_81_cast_fp16")]; tensor x2_81_begin_0 = const()[name = string("x2_81_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_81_end_0 = const()[name = string("x2_81_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_81_end_mask_0 = const()[name = string("x2_81_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_81_cast_fp16 = slice_by_index(begin = x2_81_begin_0, end = x2_81_end_0, end_mask = x2_81_end_mask_0, x = q_41_cast_fp16)[name = string("x2_81_cast_fp16")]; fp16 const_44_promoted_to_fp16 = const()[name = string("const_44_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4386_cast_fp16 = mul(x = x2_81_cast_fp16, y = const_44_promoted_to_fp16)[name = string("op_4386_cast_fp16")]; bool var_4388_interleave_0 = const()[name = string("op_4388_interleave_0"), val = bool(false)]; tensor var_4388_cast_fp16 = concat(axis = var_72, interleave = var_4388_interleave_0, values = (var_4386_cast_fp16, x1_81_cast_fp16))[name = string("op_4388_cast_fp16")]; tensor var_4389_cast_fp16 = mul(x = var_4388_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4389_cast_fp16")]; tensor query_81_cast_fp16 = add(x = var_4375_cast_fp16, y = var_4389_cast_fp16)[name = string("query_81_cast_fp16")]; tensor k_41_cast_fp16 = transpose(perm = k_41_perm_0, x = var_4366_cast_fp16)[name = string("transpose_30")]; tensor var_4391_cast_fp16 = mul(x = k_41_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4391_cast_fp16")]; tensor x1_83_begin_0 = const()[name = string("x1_83_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_83_end_0 = const()[name = string("x1_83_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_83_end_mask_0 = const()[name = string("x1_83_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_83_cast_fp16 = slice_by_index(begin = x1_83_begin_0, end = x1_83_end_0, end_mask = x1_83_end_mask_0, x = k_41_cast_fp16)[name = string("x1_83_cast_fp16")]; tensor x2_83_begin_0 = const()[name = string("x2_83_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_83_end_0 = const()[name = string("x2_83_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_83_end_mask_0 = const()[name = string("x2_83_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_83_cast_fp16 = slice_by_index(begin = x2_83_begin_0, end = x2_83_end_0, end_mask = x2_83_end_mask_0, x = k_41_cast_fp16)[name = string("x2_83_cast_fp16")]; fp16 const_45_promoted_to_fp16 = const()[name = string("const_45_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4402_cast_fp16 = mul(x = x2_83_cast_fp16, y = const_45_promoted_to_fp16)[name = string("op_4402_cast_fp16")]; bool var_4404_interleave_0 = const()[name = string("op_4404_interleave_0"), val = bool(false)]; tensor var_4404_cast_fp16 = concat(axis = var_72, interleave = var_4404_interleave_0, values = (var_4402_cast_fp16, x1_83_cast_fp16))[name = string("op_4404_cast_fp16")]; tensor var_4405_cast_fp16 = mul(x = var_4404_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4405_cast_fp16")]; tensor k_state_41_cast_fp16 = add(x = var_4391_cast_fp16, y = var_4405_cast_fp16)[name = string("k_state_41_cast_fp16")]; tensor expand_dims_240 = const()[name = string("expand_dims_240"), val = tensor([0])]; tensor expand_dims_241 = const()[name = string("expand_dims_241"), val = tensor([0])]; tensor expand_dims_243 = const()[name = string("expand_dims_243"), val = tensor([0])]; tensor concat_386_values0_0 = const()[name = string("concat_386_values0_0"), val = tensor([20])]; int32 concat_386_axis_0 = const()[name = string("concat_386_axis_0"), val = int32(0)]; bool concat_386_interleave_0 = const()[name = string("concat_386_interleave_0"), val = bool(false)]; tensor concat_386 = concat(axis = concat_386_axis_0, interleave = concat_386_interleave_0, values = (concat_386_values0_0, expand_dims_240, expand_dims_241, expand_dims_2, expand_dims_243))[name = string("concat_386")]; tensor key_cache_internal_tensor_assign_21_stride_0 = const()[name = string("key_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_386, begin_mask = key_cache_internal_tensor_assign_21_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_21_squeeze_mask_0, stride = key_cache_internal_tensor_assign_21_stride_0, update = k_state_41_cast_fp16, x = coreml_update_state_94)[name = string("key_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_21_cast_fp16, input = key_cache)[name = string("coreml_update_state_96_write_state")]; tensor coreml_update_state_96 = read_state(input = key_cache)[name = string("coreml_update_state_96")]; tensor value_cache_internal_tensor_assign_21_stride_0 = const()[name = string("value_cache_internal_tensor_assign_21_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_21_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_21_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_21_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_21_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_41_cast_fp16 = transpose(perm = v_state_41_perm_0, x = var_4371_cast_fp16)[name = string("transpose_29")]; tensor value_cache_internal_tensor_assign_21_cast_fp16 = slice_update(begin = concat_386, begin_mask = value_cache_internal_tensor_assign_21_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_21_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_21_squeeze_mask_0, stride = value_cache_internal_tensor_assign_21_stride_0, update = v_state_41_cast_fp16, x = coreml_update_state_95)[name = string("value_cache_internal_tensor_assign_21_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_21_cast_fp16, input = value_cache)[name = string("coreml_update_state_97_write_state")]; tensor coreml_update_state_97 = read_state(input = value_cache)[name = string("coreml_update_state_97")]; tensor var_4428_begin_0 = const()[name = string("op_4428_begin_0"), val = tensor([20, 0, 0, 0, 0])]; tensor var_4428_end_0 = const()[name = string("op_4428_end_0"), val = tensor([21, 1, 8, 2048, 128])]; tensor var_4428_end_mask_0 = const()[name = string("op_4428_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4428_squeeze_mask_0 = const()[name = string("op_4428_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4428_cast_fp16 = slice_by_index(begin = var_4428_begin_0, end = var_4428_end_0, end_mask = var_4428_end_mask_0, squeeze_mask = var_4428_squeeze_mask_0, x = coreml_update_state_96)[name = string("op_4428_cast_fp16")]; tensor var_4431_begin_0 = const()[name = string("op_4431_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4431_end_mask_0 = const()[name = string("op_4431_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4431_cast_fp16 = slice_by_index(begin = var_4431_begin_0, end = concat_12, end_mask = var_4431_end_mask_0, x = var_4428_cast_fp16)[name = string("op_4431_cast_fp16")]; tensor var_4433_begin_0 = const()[name = string("op_4433_begin_0"), val = tensor([20, 0, 0, 0, 0])]; tensor var_4433_end_0 = const()[name = string("op_4433_end_0"), val = tensor([21, 1, 8, 2048, 128])]; tensor var_4433_end_mask_0 = const()[name = string("op_4433_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4433_squeeze_mask_0 = const()[name = string("op_4433_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4433_cast_fp16 = slice_by_index(begin = var_4433_begin_0, end = var_4433_end_0, end_mask = var_4433_end_mask_0, squeeze_mask = var_4433_squeeze_mask_0, x = coreml_update_state_97)[name = string("op_4433_cast_fp16")]; tensor var_4436_begin_0 = const()[name = string("op_4436_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4436_end_mask_0 = const()[name = string("op_4436_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4436_cast_fp16 = slice_by_index(begin = var_4436_begin_0, end = concat_12, end_mask = var_4436_end_mask_0, x = var_4433_cast_fp16)[name = string("op_4436_cast_fp16")]; tensor var_4438_shape_cast_fp16 = shape(x = var_4431_cast_fp16)[name = string("op_4438_shape_cast_fp16")]; int32 gather_373 = const()[name = string("gather_373"), val = int32(1)]; int32 gather_374 = const()[name = string("gather_374"), val = int32(8)]; int32 gather_375_axis_0 = const()[name = string("gather_375_axis_0"), val = int32(0)]; int32 gather_375_batch_dims_0 = const()[name = string("gather_375_batch_dims_0"), val = int32(0)]; bool gather_375_validate_indices_0 = const()[name = string("gather_375_validate_indices_0"), val = bool(false)]; string var_4438_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4438_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_375_to_uint16 = const()[name = string("select_375_to_uint16"), val = uint16(2)]; tensor var_4438_shape_cast_fp16_to_uint16 = cast(dtype = var_4438_shape_cast_fp16_to_uint16_dtype_0, x = var_4438_shape_cast_fp16)[name = string("cast_576")]; uint16 gather_375_cast_uint16 = gather(axis = gather_375_axis_0, batch_dims = gather_375_batch_dims_0, indices = select_375_to_uint16, validate_indices = gather_375_validate_indices_0, x = var_4438_shape_cast_fp16_to_uint16)[name = string("gather_375_cast_uint16")]; string gather_375_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_375_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_376 = const()[name = string("gather_376"), val = int32(128)]; tensor var_4445_axes_0 = const()[name = string("op_4445_axes_0"), val = tensor([2])]; tensor var_4445_cast_fp16 = expand_dims(axes = var_4445_axes_0, x = var_4431_cast_fp16)[name = string("op_4445_cast_fp16")]; int32 concat_394_axis_0 = const()[name = string("concat_394_axis_0"), val = int32(0)]; bool concat_394_interleave_0 = const()[name = string("concat_394_interleave_0"), val = bool(false)]; int32 gather_375_cast_uint16_to_int32 = cast(dtype = gather_375_cast_uint16_to_int32_dtype_0, x = gather_375_cast_uint16)[name = string("cast_575")]; tensor concat_394 = concat(axis = concat_394_axis_0, interleave = concat_394_interleave_0, values = (gather_373, gather_374, var_78, gather_375_cast_uint16_to_int32, gather_376))[name = string("concat_394")]; tensor shape_417_cast_fp16 = shape(x = var_4445_cast_fp16)[name = string("shape_417_cast_fp16")]; tensor real_div_40 = real_div(x = concat_394, y = shape_417_cast_fp16)[name = string("real_div_40")]; tensor hidden_states_947_cast_fp16 = tile(reps = real_div_40, x = var_4445_cast_fp16)[name = string("hidden_states_947_cast_fp16")]; tensor concat_395x = const()[name = string("concat_395x"), val = tensor([1, 16, -1, 128])]; tensor key_81_cast_fp16 = reshape(shape = concat_395x, x = hidden_states_947_cast_fp16)[name = string("key_81_cast_fp16")]; tensor var_4455_shape_cast_fp16 = shape(x = var_4436_cast_fp16)[name = string("op_4455_shape_cast_fp16")]; int32 gather_377 = const()[name = string("gather_377"), val = int32(1)]; int32 gather_378 = const()[name = string("gather_378"), val = int32(8)]; int32 gather_379_axis_0 = const()[name = string("gather_379_axis_0"), val = int32(0)]; int32 gather_379_batch_dims_0 = const()[name = string("gather_379_batch_dims_0"), val = int32(0)]; bool gather_379_validate_indices_0 = const()[name = string("gather_379_validate_indices_0"), val = bool(false)]; string var_4455_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4455_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_379_to_uint16 = const()[name = string("select_379_to_uint16"), val = uint16(2)]; tensor var_4455_shape_cast_fp16_to_uint16 = cast(dtype = var_4455_shape_cast_fp16_to_uint16_dtype_0, x = var_4455_shape_cast_fp16)[name = string("cast_574")]; uint16 gather_379_cast_uint16 = gather(axis = gather_379_axis_0, batch_dims = gather_379_batch_dims_0, indices = select_379_to_uint16, validate_indices = gather_379_validate_indices_0, x = var_4455_shape_cast_fp16_to_uint16)[name = string("gather_379_cast_uint16")]; string gather_379_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_379_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_380 = const()[name = string("gather_380"), val = int32(128)]; tensor var_4462_axes_0 = const()[name = string("op_4462_axes_0"), val = tensor([2])]; tensor var_4462_cast_fp16 = expand_dims(axes = var_4462_axes_0, x = var_4436_cast_fp16)[name = string("op_4462_cast_fp16")]; int32 concat_396_axis_0 = const()[name = string("concat_396_axis_0"), val = int32(0)]; bool concat_396_interleave_0 = const()[name = string("concat_396_interleave_0"), val = bool(false)]; int32 gather_379_cast_uint16_to_int32 = cast(dtype = gather_379_cast_uint16_to_int32_dtype_0, x = gather_379_cast_uint16)[name = string("cast_573")]; tensor concat_396 = concat(axis = concat_396_axis_0, interleave = concat_396_interleave_0, values = (gather_377, gather_378, var_78, gather_379_cast_uint16_to_int32, gather_380))[name = string("concat_396")]; tensor shape_422_cast_fp16 = shape(x = var_4462_cast_fp16)[name = string("shape_422_cast_fp16")]; tensor real_div_41 = real_div(x = concat_396, y = shape_422_cast_fp16)[name = string("real_div_41")]; tensor hidden_states_951_cast_fp16 = tile(reps = real_div_41, x = var_4462_cast_fp16)[name = string("hidden_states_951_cast_fp16")]; tensor concat_397x = const()[name = string("concat_397x"), val = tensor([1, 16, -1, 128])]; tensor value_81_cast_fp16 = reshape(shape = concat_397x, x = hidden_states_951_cast_fp16)[name = string("value_81_cast_fp16")]; tensor var_4472_shape_cast_fp16 = shape(x = key_81_cast_fp16)[name = string("op_4472_shape_cast_fp16")]; int32 gather_381_axis_0 = const()[name = string("gather_381_axis_0"), val = int32(0)]; int32 gather_381_batch_dims_0 = const()[name = string("gather_381_batch_dims_0"), val = int32(0)]; bool gather_381_validate_indices_0 = const()[name = string("gather_381_validate_indices_0"), val = bool(false)]; string var_4472_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4472_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_381_to_uint16 = const()[name = string("select_381_to_uint16"), val = uint16(2)]; tensor var_4472_shape_cast_fp16_to_uint16 = cast(dtype = var_4472_shape_cast_fp16_to_uint16_dtype_0, x = var_4472_shape_cast_fp16)[name = string("cast_572")]; uint16 gather_381_cast_uint16 = gather(axis = gather_381_axis_0, batch_dims = gather_381_batch_dims_0, indices = select_381_to_uint16, validate_indices = gather_381_validate_indices_0, x = var_4472_shape_cast_fp16_to_uint16)[name = string("gather_381_cast_uint16")]; string gather_381_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_381_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_398_values0_0 = const()[name = string("concat_398_values0_0"), val = int32(1)]; int32 concat_398_values1_0 = const()[name = string("concat_398_values1_0"), val = int32(1)]; int32 concat_398_values2_0 = const()[name = string("concat_398_values2_0"), val = int32(0)]; int32 concat_398_axis_0 = const()[name = string("concat_398_axis_0"), val = int32(0)]; bool concat_398_interleave_0 = const()[name = string("concat_398_interleave_0"), val = bool(false)]; int32 gather_381_cast_uint16_to_int32 = cast(dtype = gather_381_cast_uint16_to_int32_dtype_0, x = gather_381_cast_uint16)[name = string("cast_571")]; tensor concat_398 = concat(axis = concat_398_axis_0, interleave = concat_398_interleave_0, values = (concat_398_values0_0, concat_398_values1_0, concat_398_values2_0, gather_381_cast_uint16_to_int32))[name = string("concat_398")]; tensor attention_mask_41_begin_0 = const()[name = string("attention_mask_41_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_41_end_mask_0 = const()[name = string("attention_mask_41_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_41_cast_fp16 = slice_by_index(begin = attention_mask_41_begin_0, end = concat_398, end_mask = attention_mask_41_end_mask_0, x = causal_mask)[name = string("attention_mask_41_cast_fp16")]; tensor mul_20_cast_fp16 = mul(x = query_81_cast_fp16, y = var_85_to_fp16)[name = string("mul_20_cast_fp16")]; bool matmul_20_transpose_y_0 = const()[name = string("matmul_20_transpose_y_0"), val = bool(true)]; bool matmul_20_transpose_x_0 = const()[name = string("matmul_20_transpose_x_0"), val = bool(false)]; tensor matmul_20_cast_fp16 = matmul(transpose_x = matmul_20_transpose_x_0, transpose_y = matmul_20_transpose_y_0, x = mul_20_cast_fp16, y = key_81_cast_fp16)[name = string("matmul_20_cast_fp16")]; tensor add_402_cast_fp16 = add(x = matmul_20_cast_fp16, y = attention_mask_41_cast_fp16)[name = string("add_402_cast_fp16")]; int32 softmax_20_axis_0 = const()[name = string("softmax_20_axis_0"), val = int32(-1)]; tensor softmax_20_cast_fp16 = softmax(axis = softmax_20_axis_0, x = add_402_cast_fp16)[name = string("softmax_20_cast_fp16")]; bool attn_output_81_transpose_x_0 = const()[name = string("attn_output_81_transpose_x_0"), val = bool(false)]; bool attn_output_81_transpose_y_0 = const()[name = string("attn_output_81_transpose_y_0"), val = bool(false)]; tensor attn_output_81_cast_fp16 = matmul(transpose_x = attn_output_81_transpose_x_0, transpose_y = attn_output_81_transpose_y_0, x = softmax_20_cast_fp16, y = value_81_cast_fp16)[name = string("attn_output_81_cast_fp16")]; tensor var_4481_perm_0 = const()[name = string("op_4481_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_399_axis_0 = const()[name = string("concat_399_axis_0"), val = int32(0)]; bool concat_399_interleave_0 = const()[name = string("concat_399_interleave_0"), val = bool(false)]; int32 gather_365_cast_uint16_to_int32 = cast(dtype = gather_365_cast_uint16_to_int32_dtype_0, x = gather_365_cast_uint16)[name = string("cast_577")]; tensor concat_399 = concat(axis = concat_399_axis_0, interleave = concat_399_interleave_0, values = (gather_364, gather_365_cast_uint16_to_int32, var_72))[name = string("concat_399")]; tensor var_4481_cast_fp16 = transpose(perm = var_4481_perm_0, x = attn_output_81_cast_fp16)[name = string("transpose_28")]; tensor var_4484_cast_fp16 = reshape(shape = concat_399, x = var_4481_cast_fp16)[name = string("op_4484_cast_fp16")]; tensor model_model_layers_20_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_20_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(948821120)))]; tensor linear_143_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_20_self_attn_o_proj_weight_to_fp16, x = var_4484_cast_fp16)[name = string("linear_143_cast_fp16")]; tensor hidden_states_955_cast_fp16 = add(x = hidden_states_919_cast_fp16, y = linear_143_cast_fp16)[name = string("hidden_states_955_cast_fp16")]; fp16 var_78_promoted_83_to_fp16 = const()[name = string("op_78_promoted_83_to_fp16"), val = fp16(0x1p+1)]; tensor var_4491_cast_fp16 = pow(x = hidden_states_955_cast_fp16, y = var_78_promoted_83_to_fp16)[name = string("op_4491_cast_fp16")]; tensor variance_167_axes_0 = const()[name = string("variance_167_axes_0"), val = tensor([-1])]; bool variance_167_keep_dims_0 = const()[name = string("variance_167_keep_dims_0"), val = bool(true)]; tensor variance_167_cast_fp16 = reduce_mean(axes = variance_167_axes_0, keep_dims = variance_167_keep_dims_0, x = var_4491_cast_fp16)[name = string("variance_167_cast_fp16")]; fp16 var_4494_to_fp16 = const()[name = string("op_4494_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4495_cast_fp16 = add(x = variance_167_cast_fp16, y = var_4494_to_fp16)[name = string("op_4495_cast_fp16")]; fp32 var_4496_epsilon_0 = const()[name = string("op_4496_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4496_cast_fp16 = rsqrt(epsilon = var_4496_epsilon_0, x = var_4495_cast_fp16)[name = string("op_4496_cast_fp16")]; tensor hidden_states_959_cast_fp16 = mul(x = hidden_states_955_cast_fp16, y = var_4496_cast_fp16)[name = string("hidden_states_959_cast_fp16")]; tensor model_model_layers_20_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_20_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(953015488)))]; tensor input_163_cast_fp16 = mul(x = model_model_layers_20_post_attention_layernorm_weight_to_fp16, y = hidden_states_959_cast_fp16)[name = string("input_163_cast_fp16")]; tensor model_model_layers_20_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_20_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(953017600)))]; tensor linear_144_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_20_mlp_gate_proj_weight_to_fp16, x = input_163_cast_fp16)[name = string("linear_144_cast_fp16")]; tensor var_4508_cast_fp16 = silu(x = linear_144_cast_fp16)[name = string("op_4508_cast_fp16")]; tensor model_model_layers_20_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_20_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(959309120)))]; tensor linear_145_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_20_mlp_up_proj_weight_to_fp16, x = input_163_cast_fp16)[name = string("linear_145_cast_fp16")]; tensor input_167_cast_fp16 = mul(x = var_4508_cast_fp16, y = linear_145_cast_fp16)[name = string("input_167_cast_fp16")]; tensor model_model_layers_20_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_20_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(965600640)))]; tensor linear_146_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_20_mlp_down_proj_weight_to_fp16, x = input_167_cast_fp16)[name = string("linear_146_cast_fp16")]; tensor hidden_states_965_cast_fp16 = add(x = hidden_states_955_cast_fp16, y = linear_146_cast_fp16)[name = string("hidden_states_965_cast_fp16")]; fp16 var_78_promoted_84_to_fp16 = const()[name = string("op_78_promoted_84_to_fp16"), val = fp16(0x1p+1)]; tensor var_4521_cast_fp16 = pow(x = hidden_states_965_cast_fp16, y = var_78_promoted_84_to_fp16)[name = string("op_4521_cast_fp16")]; tensor variance_169_axes_0 = const()[name = string("variance_169_axes_0"), val = tensor([-1])]; bool variance_169_keep_dims_0 = const()[name = string("variance_169_keep_dims_0"), val = bool(true)]; tensor variance_169_cast_fp16 = reduce_mean(axes = variance_169_axes_0, keep_dims = variance_169_keep_dims_0, x = var_4521_cast_fp16)[name = string("variance_169_cast_fp16")]; fp16 var_4524_to_fp16 = const()[name = string("op_4524_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4525_cast_fp16 = add(x = variance_169_cast_fp16, y = var_4524_to_fp16)[name = string("op_4525_cast_fp16")]; fp32 var_4526_epsilon_0 = const()[name = string("op_4526_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4526_cast_fp16 = rsqrt(epsilon = var_4526_epsilon_0, x = var_4525_cast_fp16)[name = string("op_4526_cast_fp16")]; tensor hidden_states_969_cast_fp16 = mul(x = hidden_states_965_cast_fp16, y = var_4526_cast_fp16)[name = string("hidden_states_969_cast_fp16")]; tensor model_model_layers_21_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(971892160)))]; tensor hidden_states_973_cast_fp16 = mul(x = model_model_layers_21_input_layernorm_weight_to_fp16, y = hidden_states_969_cast_fp16)[name = string("hidden_states_973_cast_fp16")]; tensor var_4539_shape_cast_fp16 = shape(x = hidden_states_973_cast_fp16)[name = string("op_4539_shape_cast_fp16")]; int32 gather_382 = const()[name = string("gather_382"), val = int32(1)]; int32 gather_383_axis_0 = const()[name = string("gather_383_axis_0"), val = int32(0)]; int32 gather_383_batch_dims_0 = const()[name = string("gather_383_batch_dims_0"), val = int32(0)]; bool gather_383_validate_indices_0 = const()[name = string("gather_383_validate_indices_0"), val = bool(false)]; string var_4539_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4539_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_383_to_uint16 = const()[name = string("select_383_to_uint16"), val = uint16(1)]; tensor var_4539_shape_cast_fp16_to_uint16 = cast(dtype = var_4539_shape_cast_fp16_to_uint16_dtype_0, x = var_4539_shape_cast_fp16)[name = string("cast_570")]; uint16 gather_383_cast_uint16 = gather(axis = gather_383_axis_0, batch_dims = gather_383_batch_dims_0, indices = select_383_to_uint16, validate_indices = gather_383_validate_indices_0, x = var_4539_shape_cast_fp16_to_uint16)[name = string("gather_383_cast_uint16")]; string gather_383_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_383_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_21_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(971894272)))]; tensor linear_147_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_21_self_attn_q_proj_weight_to_fp16, x = hidden_states_973_cast_fp16)[name = string("linear_147_cast_fp16")]; tensor concat_400x = const()[name = string("concat_400x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_975_cast_fp16 = reshape(shape = concat_400x, x = linear_147_cast_fp16)[name = string("hidden_states_975_cast_fp16")]; fp16 var_78_promoted_85_to_fp16 = const()[name = string("op_78_promoted_85_to_fp16"), val = fp16(0x1p+1)]; tensor var_4547_cast_fp16 = pow(x = hidden_states_975_cast_fp16, y = var_78_promoted_85_to_fp16)[name = string("op_4547_cast_fp16")]; tensor variance_171_axes_0 = const()[name = string("variance_171_axes_0"), val = tensor([-1])]; bool variance_171_keep_dims_0 = const()[name = string("variance_171_keep_dims_0"), val = bool(true)]; tensor variance_171_cast_fp16 = reduce_mean(axes = variance_171_axes_0, keep_dims = variance_171_keep_dims_0, x = var_4547_cast_fp16)[name = string("variance_171_cast_fp16")]; fp16 var_4550_to_fp16 = const()[name = string("op_4550_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4551_cast_fp16 = add(x = variance_171_cast_fp16, y = var_4550_to_fp16)[name = string("op_4551_cast_fp16")]; fp32 var_4552_epsilon_0 = const()[name = string("op_4552_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4552_cast_fp16 = rsqrt(epsilon = var_4552_epsilon_0, x = var_4551_cast_fp16)[name = string("op_4552_cast_fp16")]; tensor hidden_states_979_cast_fp16 = mul(x = hidden_states_975_cast_fp16, y = var_4552_cast_fp16)[name = string("hidden_states_979_cast_fp16")]; tensor model_model_layers_21_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(976088640)))]; tensor var_4555_cast_fp16 = mul(x = model_model_layers_21_self_attn_q_norm_weight_to_fp16, y = hidden_states_979_cast_fp16)[name = string("op_4555_cast_fp16")]; tensor q_43_perm_0 = const()[name = string("q_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_21_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(976088960)))]; tensor linear_148_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_21_self_attn_k_proj_weight_to_fp16, x = hidden_states_973_cast_fp16)[name = string("linear_148_cast_fp16")]; tensor concat_401x = const()[name = string("concat_401x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_983_cast_fp16 = reshape(shape = concat_401x, x = linear_148_cast_fp16)[name = string("hidden_states_983_cast_fp16")]; fp16 var_78_promoted_86_to_fp16 = const()[name = string("op_78_promoted_86_to_fp16"), val = fp16(0x1p+1)]; tensor var_4563_cast_fp16 = pow(x = hidden_states_983_cast_fp16, y = var_78_promoted_86_to_fp16)[name = string("op_4563_cast_fp16")]; tensor variance_173_axes_0 = const()[name = string("variance_173_axes_0"), val = tensor([-1])]; bool variance_173_keep_dims_0 = const()[name = string("variance_173_keep_dims_0"), val = bool(true)]; tensor variance_173_cast_fp16 = reduce_mean(axes = variance_173_axes_0, keep_dims = variance_173_keep_dims_0, x = var_4563_cast_fp16)[name = string("variance_173_cast_fp16")]; fp16 var_4566_to_fp16 = const()[name = string("op_4566_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4567_cast_fp16 = add(x = variance_173_cast_fp16, y = var_4566_to_fp16)[name = string("op_4567_cast_fp16")]; fp32 var_4568_epsilon_0 = const()[name = string("op_4568_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4568_cast_fp16 = rsqrt(epsilon = var_4568_epsilon_0, x = var_4567_cast_fp16)[name = string("op_4568_cast_fp16")]; tensor hidden_states_987_cast_fp16 = mul(x = hidden_states_983_cast_fp16, y = var_4568_cast_fp16)[name = string("hidden_states_987_cast_fp16")]; tensor model_model_layers_21_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978186176)))]; tensor var_4571_cast_fp16 = mul(x = model_model_layers_21_self_attn_k_norm_weight_to_fp16, y = hidden_states_987_cast_fp16)[name = string("op_4571_cast_fp16")]; tensor k_43_perm_0 = const()[name = string("k_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_21_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(978186496)))]; tensor linear_149_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_21_self_attn_v_proj_weight_to_fp16, x = hidden_states_973_cast_fp16)[name = string("linear_149_cast_fp16")]; tensor concat_402x = const()[name = string("concat_402x"), val = tensor([1, -1, 8, 128])]; tensor var_4576_cast_fp16 = reshape(shape = concat_402x, x = linear_149_cast_fp16)[name = string("op_4576_cast_fp16")]; tensor v_state_43_perm_0 = const()[name = string("v_state_43_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_43_cast_fp16 = transpose(perm = q_43_perm_0, x = var_4555_cast_fp16)[name = string("transpose_27")]; tensor var_4580_cast_fp16 = mul(x = q_43_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4580_cast_fp16")]; tensor x1_85_begin_0 = const()[name = string("x1_85_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_85_end_0 = const()[name = string("x1_85_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_85_end_mask_0 = const()[name = string("x1_85_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_85_cast_fp16 = slice_by_index(begin = x1_85_begin_0, end = x1_85_end_0, end_mask = x1_85_end_mask_0, x = q_43_cast_fp16)[name = string("x1_85_cast_fp16")]; tensor x2_85_begin_0 = const()[name = string("x2_85_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_85_end_0 = const()[name = string("x2_85_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_85_end_mask_0 = const()[name = string("x2_85_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_85_cast_fp16 = slice_by_index(begin = x2_85_begin_0, end = x2_85_end_0, end_mask = x2_85_end_mask_0, x = q_43_cast_fp16)[name = string("x2_85_cast_fp16")]; fp16 const_46_promoted_to_fp16 = const()[name = string("const_46_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4591_cast_fp16 = mul(x = x2_85_cast_fp16, y = const_46_promoted_to_fp16)[name = string("op_4591_cast_fp16")]; bool var_4593_interleave_0 = const()[name = string("op_4593_interleave_0"), val = bool(false)]; tensor var_4593_cast_fp16 = concat(axis = var_72, interleave = var_4593_interleave_0, values = (var_4591_cast_fp16, x1_85_cast_fp16))[name = string("op_4593_cast_fp16")]; tensor var_4594_cast_fp16 = mul(x = var_4593_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4594_cast_fp16")]; tensor query_85_cast_fp16 = add(x = var_4580_cast_fp16, y = var_4594_cast_fp16)[name = string("query_85_cast_fp16")]; tensor k_43_cast_fp16 = transpose(perm = k_43_perm_0, x = var_4571_cast_fp16)[name = string("transpose_26")]; tensor var_4596_cast_fp16 = mul(x = k_43_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4596_cast_fp16")]; tensor x1_87_begin_0 = const()[name = string("x1_87_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_87_end_0 = const()[name = string("x1_87_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_87_end_mask_0 = const()[name = string("x1_87_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_87_cast_fp16 = slice_by_index(begin = x1_87_begin_0, end = x1_87_end_0, end_mask = x1_87_end_mask_0, x = k_43_cast_fp16)[name = string("x1_87_cast_fp16")]; tensor x2_87_begin_0 = const()[name = string("x2_87_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_87_end_0 = const()[name = string("x2_87_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_87_end_mask_0 = const()[name = string("x2_87_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_87_cast_fp16 = slice_by_index(begin = x2_87_begin_0, end = x2_87_end_0, end_mask = x2_87_end_mask_0, x = k_43_cast_fp16)[name = string("x2_87_cast_fp16")]; fp16 const_47_promoted_to_fp16 = const()[name = string("const_47_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4607_cast_fp16 = mul(x = x2_87_cast_fp16, y = const_47_promoted_to_fp16)[name = string("op_4607_cast_fp16")]; bool var_4609_interleave_0 = const()[name = string("op_4609_interleave_0"), val = bool(false)]; tensor var_4609_cast_fp16 = concat(axis = var_72, interleave = var_4609_interleave_0, values = (var_4607_cast_fp16, x1_87_cast_fp16))[name = string("op_4609_cast_fp16")]; tensor var_4610_cast_fp16 = mul(x = var_4609_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4610_cast_fp16")]; tensor k_state_43_cast_fp16 = add(x = var_4596_cast_fp16, y = var_4610_cast_fp16)[name = string("k_state_43_cast_fp16")]; tensor expand_dims_252 = const()[name = string("expand_dims_252"), val = tensor([0])]; tensor expand_dims_253 = const()[name = string("expand_dims_253"), val = tensor([0])]; tensor expand_dims_255 = const()[name = string("expand_dims_255"), val = tensor([0])]; tensor concat_405_values0_0 = const()[name = string("concat_405_values0_0"), val = tensor([21])]; int32 concat_405_axis_0 = const()[name = string("concat_405_axis_0"), val = int32(0)]; bool concat_405_interleave_0 = const()[name = string("concat_405_interleave_0"), val = bool(false)]; tensor concat_405 = concat(axis = concat_405_axis_0, interleave = concat_405_interleave_0, values = (concat_405_values0_0, expand_dims_252, expand_dims_253, expand_dims_2, expand_dims_255))[name = string("concat_405")]; tensor key_cache_internal_tensor_assign_22_stride_0 = const()[name = string("key_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_405, begin_mask = key_cache_internal_tensor_assign_22_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_22_squeeze_mask_0, stride = key_cache_internal_tensor_assign_22_stride_0, update = k_state_43_cast_fp16, x = coreml_update_state_96)[name = string("key_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_22_cast_fp16, input = key_cache)[name = string("coreml_update_state_98_write_state")]; tensor coreml_update_state_98 = read_state(input = key_cache)[name = string("coreml_update_state_98")]; tensor value_cache_internal_tensor_assign_22_stride_0 = const()[name = string("value_cache_internal_tensor_assign_22_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_22_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_22_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_22_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_22_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_43_cast_fp16 = transpose(perm = v_state_43_perm_0, x = var_4576_cast_fp16)[name = string("transpose_25")]; tensor value_cache_internal_tensor_assign_22_cast_fp16 = slice_update(begin = concat_405, begin_mask = value_cache_internal_tensor_assign_22_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_22_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_22_squeeze_mask_0, stride = value_cache_internal_tensor_assign_22_stride_0, update = v_state_43_cast_fp16, x = coreml_update_state_97)[name = string("value_cache_internal_tensor_assign_22_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_22_cast_fp16, input = value_cache)[name = string("coreml_update_state_99_write_state")]; tensor coreml_update_state_99 = read_state(input = value_cache)[name = string("coreml_update_state_99")]; tensor var_4633_begin_0 = const()[name = string("op_4633_begin_0"), val = tensor([21, 0, 0, 0, 0])]; tensor var_4633_end_0 = const()[name = string("op_4633_end_0"), val = tensor([22, 1, 8, 2048, 128])]; tensor var_4633_end_mask_0 = const()[name = string("op_4633_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4633_squeeze_mask_0 = const()[name = string("op_4633_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4633_cast_fp16 = slice_by_index(begin = var_4633_begin_0, end = var_4633_end_0, end_mask = var_4633_end_mask_0, squeeze_mask = var_4633_squeeze_mask_0, x = coreml_update_state_98)[name = string("op_4633_cast_fp16")]; tensor var_4636_begin_0 = const()[name = string("op_4636_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4636_end_mask_0 = const()[name = string("op_4636_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4636_cast_fp16 = slice_by_index(begin = var_4636_begin_0, end = concat_12, end_mask = var_4636_end_mask_0, x = var_4633_cast_fp16)[name = string("op_4636_cast_fp16")]; tensor var_4638_begin_0 = const()[name = string("op_4638_begin_0"), val = tensor([21, 0, 0, 0, 0])]; tensor var_4638_end_0 = const()[name = string("op_4638_end_0"), val = tensor([22, 1, 8, 2048, 128])]; tensor var_4638_end_mask_0 = const()[name = string("op_4638_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4638_squeeze_mask_0 = const()[name = string("op_4638_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4638_cast_fp16 = slice_by_index(begin = var_4638_begin_0, end = var_4638_end_0, end_mask = var_4638_end_mask_0, squeeze_mask = var_4638_squeeze_mask_0, x = coreml_update_state_99)[name = string("op_4638_cast_fp16")]; tensor var_4641_begin_0 = const()[name = string("op_4641_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4641_end_mask_0 = const()[name = string("op_4641_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4641_cast_fp16 = slice_by_index(begin = var_4641_begin_0, end = concat_12, end_mask = var_4641_end_mask_0, x = var_4638_cast_fp16)[name = string("op_4641_cast_fp16")]; tensor var_4643_shape_cast_fp16 = shape(x = var_4636_cast_fp16)[name = string("op_4643_shape_cast_fp16")]; int32 gather_391 = const()[name = string("gather_391"), val = int32(1)]; int32 gather_392 = const()[name = string("gather_392"), val = int32(8)]; int32 gather_393_axis_0 = const()[name = string("gather_393_axis_0"), val = int32(0)]; int32 gather_393_batch_dims_0 = const()[name = string("gather_393_batch_dims_0"), val = int32(0)]; bool gather_393_validate_indices_0 = const()[name = string("gather_393_validate_indices_0"), val = bool(false)]; string var_4643_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4643_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_393_to_uint16 = const()[name = string("select_393_to_uint16"), val = uint16(2)]; tensor var_4643_shape_cast_fp16_to_uint16 = cast(dtype = var_4643_shape_cast_fp16_to_uint16_dtype_0, x = var_4643_shape_cast_fp16)[name = string("cast_568")]; uint16 gather_393_cast_uint16 = gather(axis = gather_393_axis_0, batch_dims = gather_393_batch_dims_0, indices = select_393_to_uint16, validate_indices = gather_393_validate_indices_0, x = var_4643_shape_cast_fp16_to_uint16)[name = string("gather_393_cast_uint16")]; string gather_393_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_393_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_394 = const()[name = string("gather_394"), val = int32(128)]; tensor var_4650_axes_0 = const()[name = string("op_4650_axes_0"), val = tensor([2])]; tensor var_4650_cast_fp16 = expand_dims(axes = var_4650_axes_0, x = var_4636_cast_fp16)[name = string("op_4650_cast_fp16")]; int32 concat_413_axis_0 = const()[name = string("concat_413_axis_0"), val = int32(0)]; bool concat_413_interleave_0 = const()[name = string("concat_413_interleave_0"), val = bool(false)]; int32 gather_393_cast_uint16_to_int32 = cast(dtype = gather_393_cast_uint16_to_int32_dtype_0, x = gather_393_cast_uint16)[name = string("cast_567")]; tensor concat_413 = concat(axis = concat_413_axis_0, interleave = concat_413_interleave_0, values = (gather_391, gather_392, var_78, gather_393_cast_uint16_to_int32, gather_394))[name = string("concat_413")]; tensor shape_437_cast_fp16 = shape(x = var_4650_cast_fp16)[name = string("shape_437_cast_fp16")]; tensor real_div_42 = real_div(x = concat_413, y = shape_437_cast_fp16)[name = string("real_div_42")]; tensor hidden_states_993_cast_fp16 = tile(reps = real_div_42, x = var_4650_cast_fp16)[name = string("hidden_states_993_cast_fp16")]; tensor concat_414x = const()[name = string("concat_414x"), val = tensor([1, 16, -1, 128])]; tensor key_85_cast_fp16 = reshape(shape = concat_414x, x = hidden_states_993_cast_fp16)[name = string("key_85_cast_fp16")]; tensor var_4660_shape_cast_fp16 = shape(x = var_4641_cast_fp16)[name = string("op_4660_shape_cast_fp16")]; int32 gather_395 = const()[name = string("gather_395"), val = int32(1)]; int32 gather_396 = const()[name = string("gather_396"), val = int32(8)]; int32 gather_397_axis_0 = const()[name = string("gather_397_axis_0"), val = int32(0)]; int32 gather_397_batch_dims_0 = const()[name = string("gather_397_batch_dims_0"), val = int32(0)]; bool gather_397_validate_indices_0 = const()[name = string("gather_397_validate_indices_0"), val = bool(false)]; string var_4660_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4660_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_397_to_uint16 = const()[name = string("select_397_to_uint16"), val = uint16(2)]; tensor var_4660_shape_cast_fp16_to_uint16 = cast(dtype = var_4660_shape_cast_fp16_to_uint16_dtype_0, x = var_4660_shape_cast_fp16)[name = string("cast_566")]; uint16 gather_397_cast_uint16 = gather(axis = gather_397_axis_0, batch_dims = gather_397_batch_dims_0, indices = select_397_to_uint16, validate_indices = gather_397_validate_indices_0, x = var_4660_shape_cast_fp16_to_uint16)[name = string("gather_397_cast_uint16")]; string gather_397_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_397_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_398 = const()[name = string("gather_398"), val = int32(128)]; tensor var_4667_axes_0 = const()[name = string("op_4667_axes_0"), val = tensor([2])]; tensor var_4667_cast_fp16 = expand_dims(axes = var_4667_axes_0, x = var_4641_cast_fp16)[name = string("op_4667_cast_fp16")]; int32 concat_415_axis_0 = const()[name = string("concat_415_axis_0"), val = int32(0)]; bool concat_415_interleave_0 = const()[name = string("concat_415_interleave_0"), val = bool(false)]; int32 gather_397_cast_uint16_to_int32 = cast(dtype = gather_397_cast_uint16_to_int32_dtype_0, x = gather_397_cast_uint16)[name = string("cast_565")]; tensor concat_415 = concat(axis = concat_415_axis_0, interleave = concat_415_interleave_0, values = (gather_395, gather_396, var_78, gather_397_cast_uint16_to_int32, gather_398))[name = string("concat_415")]; tensor shape_442_cast_fp16 = shape(x = var_4667_cast_fp16)[name = string("shape_442_cast_fp16")]; tensor real_div_43 = real_div(x = concat_415, y = shape_442_cast_fp16)[name = string("real_div_43")]; tensor hidden_states_997_cast_fp16 = tile(reps = real_div_43, x = var_4667_cast_fp16)[name = string("hidden_states_997_cast_fp16")]; tensor concat_416x = const()[name = string("concat_416x"), val = tensor([1, 16, -1, 128])]; tensor value_85_cast_fp16 = reshape(shape = concat_416x, x = hidden_states_997_cast_fp16)[name = string("value_85_cast_fp16")]; tensor var_4677_shape_cast_fp16 = shape(x = key_85_cast_fp16)[name = string("op_4677_shape_cast_fp16")]; int32 gather_399_axis_0 = const()[name = string("gather_399_axis_0"), val = int32(0)]; int32 gather_399_batch_dims_0 = const()[name = string("gather_399_batch_dims_0"), val = int32(0)]; bool gather_399_validate_indices_0 = const()[name = string("gather_399_validate_indices_0"), val = bool(false)]; string var_4677_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4677_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_399_to_uint16 = const()[name = string("select_399_to_uint16"), val = uint16(2)]; tensor var_4677_shape_cast_fp16_to_uint16 = cast(dtype = var_4677_shape_cast_fp16_to_uint16_dtype_0, x = var_4677_shape_cast_fp16)[name = string("cast_564")]; uint16 gather_399_cast_uint16 = gather(axis = gather_399_axis_0, batch_dims = gather_399_batch_dims_0, indices = select_399_to_uint16, validate_indices = gather_399_validate_indices_0, x = var_4677_shape_cast_fp16_to_uint16)[name = string("gather_399_cast_uint16")]; string gather_399_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_399_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_417_values0_0 = const()[name = string("concat_417_values0_0"), val = int32(1)]; int32 concat_417_values1_0 = const()[name = string("concat_417_values1_0"), val = int32(1)]; int32 concat_417_values2_0 = const()[name = string("concat_417_values2_0"), val = int32(0)]; int32 concat_417_axis_0 = const()[name = string("concat_417_axis_0"), val = int32(0)]; bool concat_417_interleave_0 = const()[name = string("concat_417_interleave_0"), val = bool(false)]; int32 gather_399_cast_uint16_to_int32 = cast(dtype = gather_399_cast_uint16_to_int32_dtype_0, x = gather_399_cast_uint16)[name = string("cast_563")]; tensor concat_417 = concat(axis = concat_417_axis_0, interleave = concat_417_interleave_0, values = (concat_417_values0_0, concat_417_values1_0, concat_417_values2_0, gather_399_cast_uint16_to_int32))[name = string("concat_417")]; tensor attention_mask_43_begin_0 = const()[name = string("attention_mask_43_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_43_end_mask_0 = const()[name = string("attention_mask_43_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_43_cast_fp16 = slice_by_index(begin = attention_mask_43_begin_0, end = concat_417, end_mask = attention_mask_43_end_mask_0, x = causal_mask)[name = string("attention_mask_43_cast_fp16")]; tensor mul_21_cast_fp16 = mul(x = query_85_cast_fp16, y = var_85_to_fp16)[name = string("mul_21_cast_fp16")]; bool matmul_21_transpose_y_0 = const()[name = string("matmul_21_transpose_y_0"), val = bool(true)]; bool matmul_21_transpose_x_0 = const()[name = string("matmul_21_transpose_x_0"), val = bool(false)]; tensor matmul_21_cast_fp16 = matmul(transpose_x = matmul_21_transpose_x_0, transpose_y = matmul_21_transpose_y_0, x = mul_21_cast_fp16, y = key_85_cast_fp16)[name = string("matmul_21_cast_fp16")]; tensor add_421_cast_fp16 = add(x = matmul_21_cast_fp16, y = attention_mask_43_cast_fp16)[name = string("add_421_cast_fp16")]; int32 softmax_21_axis_0 = const()[name = string("softmax_21_axis_0"), val = int32(-1)]; tensor softmax_21_cast_fp16 = softmax(axis = softmax_21_axis_0, x = add_421_cast_fp16)[name = string("softmax_21_cast_fp16")]; bool attn_output_85_transpose_x_0 = const()[name = string("attn_output_85_transpose_x_0"), val = bool(false)]; bool attn_output_85_transpose_y_0 = const()[name = string("attn_output_85_transpose_y_0"), val = bool(false)]; tensor attn_output_85_cast_fp16 = matmul(transpose_x = attn_output_85_transpose_x_0, transpose_y = attn_output_85_transpose_y_0, x = softmax_21_cast_fp16, y = value_85_cast_fp16)[name = string("attn_output_85_cast_fp16")]; tensor var_4686_perm_0 = const()[name = string("op_4686_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_418_axis_0 = const()[name = string("concat_418_axis_0"), val = int32(0)]; bool concat_418_interleave_0 = const()[name = string("concat_418_interleave_0"), val = bool(false)]; int32 gather_383_cast_uint16_to_int32 = cast(dtype = gather_383_cast_uint16_to_int32_dtype_0, x = gather_383_cast_uint16)[name = string("cast_569")]; tensor concat_418 = concat(axis = concat_418_axis_0, interleave = concat_418_interleave_0, values = (gather_382, gather_383_cast_uint16_to_int32, var_72))[name = string("concat_418")]; tensor var_4686_cast_fp16 = transpose(perm = var_4686_perm_0, x = attn_output_85_cast_fp16)[name = string("transpose_24")]; tensor var_4689_cast_fp16 = reshape(shape = concat_418, x = var_4686_cast_fp16)[name = string("op_4689_cast_fp16")]; tensor model_model_layers_21_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_21_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(980283712)))]; tensor linear_150_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_21_self_attn_o_proj_weight_to_fp16, x = var_4689_cast_fp16)[name = string("linear_150_cast_fp16")]; tensor hidden_states_1001_cast_fp16 = add(x = hidden_states_965_cast_fp16, y = linear_150_cast_fp16)[name = string("hidden_states_1001_cast_fp16")]; fp16 var_78_promoted_87_to_fp16 = const()[name = string("op_78_promoted_87_to_fp16"), val = fp16(0x1p+1)]; tensor var_4696_cast_fp16 = pow(x = hidden_states_1001_cast_fp16, y = var_78_promoted_87_to_fp16)[name = string("op_4696_cast_fp16")]; tensor variance_175_axes_0 = const()[name = string("variance_175_axes_0"), val = tensor([-1])]; bool variance_175_keep_dims_0 = const()[name = string("variance_175_keep_dims_0"), val = bool(true)]; tensor variance_175_cast_fp16 = reduce_mean(axes = variance_175_axes_0, keep_dims = variance_175_keep_dims_0, x = var_4696_cast_fp16)[name = string("variance_175_cast_fp16")]; fp16 var_4699_to_fp16 = const()[name = string("op_4699_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4700_cast_fp16 = add(x = variance_175_cast_fp16, y = var_4699_to_fp16)[name = string("op_4700_cast_fp16")]; fp32 var_4701_epsilon_0 = const()[name = string("op_4701_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4701_cast_fp16 = rsqrt(epsilon = var_4701_epsilon_0, x = var_4700_cast_fp16)[name = string("op_4701_cast_fp16")]; tensor hidden_states_1005_cast_fp16 = mul(x = hidden_states_1001_cast_fp16, y = var_4701_cast_fp16)[name = string("hidden_states_1005_cast_fp16")]; tensor model_model_layers_21_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_21_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984478080)))]; tensor input_171_cast_fp16 = mul(x = model_model_layers_21_post_attention_layernorm_weight_to_fp16, y = hidden_states_1005_cast_fp16)[name = string("input_171_cast_fp16")]; tensor model_model_layers_21_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_21_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(984480192)))]; tensor linear_151_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_21_mlp_gate_proj_weight_to_fp16, x = input_171_cast_fp16)[name = string("linear_151_cast_fp16")]; tensor var_4713_cast_fp16 = silu(x = linear_151_cast_fp16)[name = string("op_4713_cast_fp16")]; tensor model_model_layers_21_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_21_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(990771712)))]; tensor linear_152_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_21_mlp_up_proj_weight_to_fp16, x = input_171_cast_fp16)[name = string("linear_152_cast_fp16")]; tensor input_175_cast_fp16 = mul(x = var_4713_cast_fp16, y = linear_152_cast_fp16)[name = string("input_175_cast_fp16")]; tensor model_model_layers_21_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_21_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(997063232)))]; tensor linear_153_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_21_mlp_down_proj_weight_to_fp16, x = input_175_cast_fp16)[name = string("linear_153_cast_fp16")]; tensor hidden_states_1011_cast_fp16 = add(x = hidden_states_1001_cast_fp16, y = linear_153_cast_fp16)[name = string("hidden_states_1011_cast_fp16")]; fp16 var_78_promoted_88_to_fp16 = const()[name = string("op_78_promoted_88_to_fp16"), val = fp16(0x1p+1)]; tensor var_4726_cast_fp16 = pow(x = hidden_states_1011_cast_fp16, y = var_78_promoted_88_to_fp16)[name = string("op_4726_cast_fp16")]; tensor variance_177_axes_0 = const()[name = string("variance_177_axes_0"), val = tensor([-1])]; bool variance_177_keep_dims_0 = const()[name = string("variance_177_keep_dims_0"), val = bool(true)]; tensor variance_177_cast_fp16 = reduce_mean(axes = variance_177_axes_0, keep_dims = variance_177_keep_dims_0, x = var_4726_cast_fp16)[name = string("variance_177_cast_fp16")]; fp16 var_4729_to_fp16 = const()[name = string("op_4729_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4730_cast_fp16 = add(x = variance_177_cast_fp16, y = var_4729_to_fp16)[name = string("op_4730_cast_fp16")]; fp32 var_4731_epsilon_0 = const()[name = string("op_4731_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4731_cast_fp16 = rsqrt(epsilon = var_4731_epsilon_0, x = var_4730_cast_fp16)[name = string("op_4731_cast_fp16")]; tensor hidden_states_1015_cast_fp16 = mul(x = hidden_states_1011_cast_fp16, y = var_4731_cast_fp16)[name = string("hidden_states_1015_cast_fp16")]; tensor model_model_layers_22_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1003354752)))]; tensor hidden_states_1019_cast_fp16 = mul(x = model_model_layers_22_input_layernorm_weight_to_fp16, y = hidden_states_1015_cast_fp16)[name = string("hidden_states_1019_cast_fp16")]; tensor var_4744_shape_cast_fp16 = shape(x = hidden_states_1019_cast_fp16)[name = string("op_4744_shape_cast_fp16")]; int32 gather_400 = const()[name = string("gather_400"), val = int32(1)]; int32 gather_401_axis_0 = const()[name = string("gather_401_axis_0"), val = int32(0)]; int32 gather_401_batch_dims_0 = const()[name = string("gather_401_batch_dims_0"), val = int32(0)]; bool gather_401_validate_indices_0 = const()[name = string("gather_401_validate_indices_0"), val = bool(false)]; string var_4744_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4744_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_401_to_uint16 = const()[name = string("select_401_to_uint16"), val = uint16(1)]; tensor var_4744_shape_cast_fp16_to_uint16 = cast(dtype = var_4744_shape_cast_fp16_to_uint16_dtype_0, x = var_4744_shape_cast_fp16)[name = string("cast_562")]; uint16 gather_401_cast_uint16 = gather(axis = gather_401_axis_0, batch_dims = gather_401_batch_dims_0, indices = select_401_to_uint16, validate_indices = gather_401_validate_indices_0, x = var_4744_shape_cast_fp16_to_uint16)[name = string("gather_401_cast_uint16")]; string gather_401_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_401_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_22_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1003356864)))]; tensor linear_154_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_22_self_attn_q_proj_weight_to_fp16, x = hidden_states_1019_cast_fp16)[name = string("linear_154_cast_fp16")]; tensor concat_419x = const()[name = string("concat_419x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_1021_cast_fp16 = reshape(shape = concat_419x, x = linear_154_cast_fp16)[name = string("hidden_states_1021_cast_fp16")]; fp16 var_78_promoted_89_to_fp16 = const()[name = string("op_78_promoted_89_to_fp16"), val = fp16(0x1p+1)]; tensor var_4752_cast_fp16 = pow(x = hidden_states_1021_cast_fp16, y = var_78_promoted_89_to_fp16)[name = string("op_4752_cast_fp16")]; tensor variance_179_axes_0 = const()[name = string("variance_179_axes_0"), val = tensor([-1])]; bool variance_179_keep_dims_0 = const()[name = string("variance_179_keep_dims_0"), val = bool(true)]; tensor variance_179_cast_fp16 = reduce_mean(axes = variance_179_axes_0, keep_dims = variance_179_keep_dims_0, x = var_4752_cast_fp16)[name = string("variance_179_cast_fp16")]; fp16 var_4755_to_fp16 = const()[name = string("op_4755_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4756_cast_fp16 = add(x = variance_179_cast_fp16, y = var_4755_to_fp16)[name = string("op_4756_cast_fp16")]; fp32 var_4757_epsilon_0 = const()[name = string("op_4757_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4757_cast_fp16 = rsqrt(epsilon = var_4757_epsilon_0, x = var_4756_cast_fp16)[name = string("op_4757_cast_fp16")]; tensor hidden_states_1025_cast_fp16 = mul(x = hidden_states_1021_cast_fp16, y = var_4757_cast_fp16)[name = string("hidden_states_1025_cast_fp16")]; tensor model_model_layers_22_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1007551232)))]; tensor var_4760_cast_fp16 = mul(x = model_model_layers_22_self_attn_q_norm_weight_to_fp16, y = hidden_states_1025_cast_fp16)[name = string("op_4760_cast_fp16")]; tensor q_45_perm_0 = const()[name = string("q_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_22_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1007551552)))]; tensor linear_155_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_22_self_attn_k_proj_weight_to_fp16, x = hidden_states_1019_cast_fp16)[name = string("linear_155_cast_fp16")]; tensor concat_420x = const()[name = string("concat_420x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_1029_cast_fp16 = reshape(shape = concat_420x, x = linear_155_cast_fp16)[name = string("hidden_states_1029_cast_fp16")]; fp16 var_78_promoted_90_to_fp16 = const()[name = string("op_78_promoted_90_to_fp16"), val = fp16(0x1p+1)]; tensor var_4768_cast_fp16 = pow(x = hidden_states_1029_cast_fp16, y = var_78_promoted_90_to_fp16)[name = string("op_4768_cast_fp16")]; tensor variance_181_axes_0 = const()[name = string("variance_181_axes_0"), val = tensor([-1])]; bool variance_181_keep_dims_0 = const()[name = string("variance_181_keep_dims_0"), val = bool(true)]; tensor variance_181_cast_fp16 = reduce_mean(axes = variance_181_axes_0, keep_dims = variance_181_keep_dims_0, x = var_4768_cast_fp16)[name = string("variance_181_cast_fp16")]; fp16 var_4771_to_fp16 = const()[name = string("op_4771_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4772_cast_fp16 = add(x = variance_181_cast_fp16, y = var_4771_to_fp16)[name = string("op_4772_cast_fp16")]; fp32 var_4773_epsilon_0 = const()[name = string("op_4773_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4773_cast_fp16 = rsqrt(epsilon = var_4773_epsilon_0, x = var_4772_cast_fp16)[name = string("op_4773_cast_fp16")]; tensor hidden_states_1033_cast_fp16 = mul(x = hidden_states_1029_cast_fp16, y = var_4773_cast_fp16)[name = string("hidden_states_1033_cast_fp16")]; tensor model_model_layers_22_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1009648768)))]; tensor var_4776_cast_fp16 = mul(x = model_model_layers_22_self_attn_k_norm_weight_to_fp16, y = hidden_states_1033_cast_fp16)[name = string("op_4776_cast_fp16")]; tensor k_45_perm_0 = const()[name = string("k_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_22_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1009649088)))]; tensor linear_156_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_22_self_attn_v_proj_weight_to_fp16, x = hidden_states_1019_cast_fp16)[name = string("linear_156_cast_fp16")]; tensor concat_421x = const()[name = string("concat_421x"), val = tensor([1, -1, 8, 128])]; tensor var_4781_cast_fp16 = reshape(shape = concat_421x, x = linear_156_cast_fp16)[name = string("op_4781_cast_fp16")]; tensor v_state_45_perm_0 = const()[name = string("v_state_45_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_45_cast_fp16 = transpose(perm = q_45_perm_0, x = var_4760_cast_fp16)[name = string("transpose_23")]; tensor var_4785_cast_fp16 = mul(x = q_45_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4785_cast_fp16")]; tensor x1_89_begin_0 = const()[name = string("x1_89_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_89_end_0 = const()[name = string("x1_89_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_89_end_mask_0 = const()[name = string("x1_89_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_89_cast_fp16 = slice_by_index(begin = x1_89_begin_0, end = x1_89_end_0, end_mask = x1_89_end_mask_0, x = q_45_cast_fp16)[name = string("x1_89_cast_fp16")]; tensor x2_89_begin_0 = const()[name = string("x2_89_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_89_end_0 = const()[name = string("x2_89_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_89_end_mask_0 = const()[name = string("x2_89_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_89_cast_fp16 = slice_by_index(begin = x2_89_begin_0, end = x2_89_end_0, end_mask = x2_89_end_mask_0, x = q_45_cast_fp16)[name = string("x2_89_cast_fp16")]; fp16 const_48_promoted_to_fp16 = const()[name = string("const_48_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4796_cast_fp16 = mul(x = x2_89_cast_fp16, y = const_48_promoted_to_fp16)[name = string("op_4796_cast_fp16")]; bool var_4798_interleave_0 = const()[name = string("op_4798_interleave_0"), val = bool(false)]; tensor var_4798_cast_fp16 = concat(axis = var_72, interleave = var_4798_interleave_0, values = (var_4796_cast_fp16, x1_89_cast_fp16))[name = string("op_4798_cast_fp16")]; tensor var_4799_cast_fp16 = mul(x = var_4798_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4799_cast_fp16")]; tensor query_89_cast_fp16 = add(x = var_4785_cast_fp16, y = var_4799_cast_fp16)[name = string("query_89_cast_fp16")]; tensor k_45_cast_fp16 = transpose(perm = k_45_perm_0, x = var_4776_cast_fp16)[name = string("transpose_22")]; tensor var_4801_cast_fp16 = mul(x = k_45_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4801_cast_fp16")]; tensor x1_91_begin_0 = const()[name = string("x1_91_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_91_end_0 = const()[name = string("x1_91_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_91_end_mask_0 = const()[name = string("x1_91_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_91_cast_fp16 = slice_by_index(begin = x1_91_begin_0, end = x1_91_end_0, end_mask = x1_91_end_mask_0, x = k_45_cast_fp16)[name = string("x1_91_cast_fp16")]; tensor x2_91_begin_0 = const()[name = string("x2_91_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_91_end_0 = const()[name = string("x2_91_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_91_end_mask_0 = const()[name = string("x2_91_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_91_cast_fp16 = slice_by_index(begin = x2_91_begin_0, end = x2_91_end_0, end_mask = x2_91_end_mask_0, x = k_45_cast_fp16)[name = string("x2_91_cast_fp16")]; fp16 const_49_promoted_to_fp16 = const()[name = string("const_49_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_4812_cast_fp16 = mul(x = x2_91_cast_fp16, y = const_49_promoted_to_fp16)[name = string("op_4812_cast_fp16")]; bool var_4814_interleave_0 = const()[name = string("op_4814_interleave_0"), val = bool(false)]; tensor var_4814_cast_fp16 = concat(axis = var_72, interleave = var_4814_interleave_0, values = (var_4812_cast_fp16, x1_91_cast_fp16))[name = string("op_4814_cast_fp16")]; tensor var_4815_cast_fp16 = mul(x = var_4814_cast_fp16, y = sin_5_cast_fp16)[name = string("op_4815_cast_fp16")]; tensor k_state_45_cast_fp16 = add(x = var_4801_cast_fp16, y = var_4815_cast_fp16)[name = string("k_state_45_cast_fp16")]; tensor expand_dims_264 = const()[name = string("expand_dims_264"), val = tensor([0])]; tensor expand_dims_265 = const()[name = string("expand_dims_265"), val = tensor([0])]; tensor expand_dims_267 = const()[name = string("expand_dims_267"), val = tensor([0])]; tensor concat_424_values0_0 = const()[name = string("concat_424_values0_0"), val = tensor([22])]; int32 concat_424_axis_0 = const()[name = string("concat_424_axis_0"), val = int32(0)]; bool concat_424_interleave_0 = const()[name = string("concat_424_interleave_0"), val = bool(false)]; tensor concat_424 = concat(axis = concat_424_axis_0, interleave = concat_424_interleave_0, values = (concat_424_values0_0, expand_dims_264, expand_dims_265, expand_dims_2, expand_dims_267))[name = string("concat_424")]; tensor key_cache_internal_tensor_assign_23_stride_0 = const()[name = string("key_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_424, begin_mask = key_cache_internal_tensor_assign_23_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_23_squeeze_mask_0, stride = key_cache_internal_tensor_assign_23_stride_0, update = k_state_45_cast_fp16, x = coreml_update_state_98)[name = string("key_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_23_cast_fp16, input = key_cache)[name = string("coreml_update_state_100_write_state")]; tensor coreml_update_state_100 = read_state(input = key_cache)[name = string("coreml_update_state_100")]; tensor value_cache_internal_tensor_assign_23_stride_0 = const()[name = string("value_cache_internal_tensor_assign_23_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_23_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_23_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_23_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_23_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_45_cast_fp16 = transpose(perm = v_state_45_perm_0, x = var_4781_cast_fp16)[name = string("transpose_21")]; tensor value_cache_internal_tensor_assign_23_cast_fp16 = slice_update(begin = concat_424, begin_mask = value_cache_internal_tensor_assign_23_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_23_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_23_squeeze_mask_0, stride = value_cache_internal_tensor_assign_23_stride_0, update = v_state_45_cast_fp16, x = coreml_update_state_99)[name = string("value_cache_internal_tensor_assign_23_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_23_cast_fp16, input = value_cache)[name = string("coreml_update_state_101_write_state")]; tensor coreml_update_state_101 = read_state(input = value_cache)[name = string("coreml_update_state_101")]; tensor var_4838_begin_0 = const()[name = string("op_4838_begin_0"), val = tensor([22, 0, 0, 0, 0])]; tensor var_4838_end_0 = const()[name = string("op_4838_end_0"), val = tensor([23, 1, 8, 2048, 128])]; tensor var_4838_end_mask_0 = const()[name = string("op_4838_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4838_squeeze_mask_0 = const()[name = string("op_4838_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4838_cast_fp16 = slice_by_index(begin = var_4838_begin_0, end = var_4838_end_0, end_mask = var_4838_end_mask_0, squeeze_mask = var_4838_squeeze_mask_0, x = coreml_update_state_100)[name = string("op_4838_cast_fp16")]; tensor var_4841_begin_0 = const()[name = string("op_4841_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4841_end_mask_0 = const()[name = string("op_4841_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4841_cast_fp16 = slice_by_index(begin = var_4841_begin_0, end = concat_12, end_mask = var_4841_end_mask_0, x = var_4838_cast_fp16)[name = string("op_4841_cast_fp16")]; tensor var_4843_begin_0 = const()[name = string("op_4843_begin_0"), val = tensor([22, 0, 0, 0, 0])]; tensor var_4843_end_0 = const()[name = string("op_4843_end_0"), val = tensor([23, 1, 8, 2048, 128])]; tensor var_4843_end_mask_0 = const()[name = string("op_4843_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_4843_squeeze_mask_0 = const()[name = string("op_4843_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_4843_cast_fp16 = slice_by_index(begin = var_4843_begin_0, end = var_4843_end_0, end_mask = var_4843_end_mask_0, squeeze_mask = var_4843_squeeze_mask_0, x = coreml_update_state_101)[name = string("op_4843_cast_fp16")]; tensor var_4846_begin_0 = const()[name = string("op_4846_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_4846_end_mask_0 = const()[name = string("op_4846_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_4846_cast_fp16 = slice_by_index(begin = var_4846_begin_0, end = concat_12, end_mask = var_4846_end_mask_0, x = var_4843_cast_fp16)[name = string("op_4846_cast_fp16")]; tensor var_4848_shape_cast_fp16 = shape(x = var_4841_cast_fp16)[name = string("op_4848_shape_cast_fp16")]; int32 gather_409 = const()[name = string("gather_409"), val = int32(1)]; int32 gather_410 = const()[name = string("gather_410"), val = int32(8)]; int32 gather_411_axis_0 = const()[name = string("gather_411_axis_0"), val = int32(0)]; int32 gather_411_batch_dims_0 = const()[name = string("gather_411_batch_dims_0"), val = int32(0)]; bool gather_411_validate_indices_0 = const()[name = string("gather_411_validate_indices_0"), val = bool(false)]; string var_4848_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4848_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_411_to_uint16 = const()[name = string("select_411_to_uint16"), val = uint16(2)]; tensor var_4848_shape_cast_fp16_to_uint16 = cast(dtype = var_4848_shape_cast_fp16_to_uint16_dtype_0, x = var_4848_shape_cast_fp16)[name = string("cast_560")]; uint16 gather_411_cast_uint16 = gather(axis = gather_411_axis_0, batch_dims = gather_411_batch_dims_0, indices = select_411_to_uint16, validate_indices = gather_411_validate_indices_0, x = var_4848_shape_cast_fp16_to_uint16)[name = string("gather_411_cast_uint16")]; string gather_411_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_411_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_412 = const()[name = string("gather_412"), val = int32(128)]; tensor var_4855_axes_0 = const()[name = string("op_4855_axes_0"), val = tensor([2])]; tensor var_4855_cast_fp16 = expand_dims(axes = var_4855_axes_0, x = var_4841_cast_fp16)[name = string("op_4855_cast_fp16")]; int32 concat_432_axis_0 = const()[name = string("concat_432_axis_0"), val = int32(0)]; bool concat_432_interleave_0 = const()[name = string("concat_432_interleave_0"), val = bool(false)]; int32 gather_411_cast_uint16_to_int32 = cast(dtype = gather_411_cast_uint16_to_int32_dtype_0, x = gather_411_cast_uint16)[name = string("cast_559")]; tensor concat_432 = concat(axis = concat_432_axis_0, interleave = concat_432_interleave_0, values = (gather_409, gather_410, var_78, gather_411_cast_uint16_to_int32, gather_412))[name = string("concat_432")]; tensor shape_457_cast_fp16 = shape(x = var_4855_cast_fp16)[name = string("shape_457_cast_fp16")]; tensor real_div_44 = real_div(x = concat_432, y = shape_457_cast_fp16)[name = string("real_div_44")]; tensor hidden_states_1039_cast_fp16 = tile(reps = real_div_44, x = var_4855_cast_fp16)[name = string("hidden_states_1039_cast_fp16")]; tensor concat_433x = const()[name = string("concat_433x"), val = tensor([1, 16, -1, 128])]; tensor key_89_cast_fp16 = reshape(shape = concat_433x, x = hidden_states_1039_cast_fp16)[name = string("key_89_cast_fp16")]; tensor var_4865_shape_cast_fp16 = shape(x = var_4846_cast_fp16)[name = string("op_4865_shape_cast_fp16")]; int32 gather_413 = const()[name = string("gather_413"), val = int32(1)]; int32 gather_414 = const()[name = string("gather_414"), val = int32(8)]; int32 gather_415_axis_0 = const()[name = string("gather_415_axis_0"), val = int32(0)]; int32 gather_415_batch_dims_0 = const()[name = string("gather_415_batch_dims_0"), val = int32(0)]; bool gather_415_validate_indices_0 = const()[name = string("gather_415_validate_indices_0"), val = bool(false)]; string var_4865_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4865_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_415_to_uint16 = const()[name = string("select_415_to_uint16"), val = uint16(2)]; tensor var_4865_shape_cast_fp16_to_uint16 = cast(dtype = var_4865_shape_cast_fp16_to_uint16_dtype_0, x = var_4865_shape_cast_fp16)[name = string("cast_558")]; uint16 gather_415_cast_uint16 = gather(axis = gather_415_axis_0, batch_dims = gather_415_batch_dims_0, indices = select_415_to_uint16, validate_indices = gather_415_validate_indices_0, x = var_4865_shape_cast_fp16_to_uint16)[name = string("gather_415_cast_uint16")]; string gather_415_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_415_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_416 = const()[name = string("gather_416"), val = int32(128)]; tensor var_4872_axes_0 = const()[name = string("op_4872_axes_0"), val = tensor([2])]; tensor var_4872_cast_fp16 = expand_dims(axes = var_4872_axes_0, x = var_4846_cast_fp16)[name = string("op_4872_cast_fp16")]; int32 concat_434_axis_0 = const()[name = string("concat_434_axis_0"), val = int32(0)]; bool concat_434_interleave_0 = const()[name = string("concat_434_interleave_0"), val = bool(false)]; int32 gather_415_cast_uint16_to_int32 = cast(dtype = gather_415_cast_uint16_to_int32_dtype_0, x = gather_415_cast_uint16)[name = string("cast_557")]; tensor concat_434 = concat(axis = concat_434_axis_0, interleave = concat_434_interleave_0, values = (gather_413, gather_414, var_78, gather_415_cast_uint16_to_int32, gather_416))[name = string("concat_434")]; tensor shape_462_cast_fp16 = shape(x = var_4872_cast_fp16)[name = string("shape_462_cast_fp16")]; tensor real_div_45 = real_div(x = concat_434, y = shape_462_cast_fp16)[name = string("real_div_45")]; tensor hidden_states_1043_cast_fp16 = tile(reps = real_div_45, x = var_4872_cast_fp16)[name = string("hidden_states_1043_cast_fp16")]; tensor concat_435x = const()[name = string("concat_435x"), val = tensor([1, 16, -1, 128])]; tensor value_89_cast_fp16 = reshape(shape = concat_435x, x = hidden_states_1043_cast_fp16)[name = string("value_89_cast_fp16")]; tensor var_4882_shape_cast_fp16 = shape(x = key_89_cast_fp16)[name = string("op_4882_shape_cast_fp16")]; int32 gather_417_axis_0 = const()[name = string("gather_417_axis_0"), val = int32(0)]; int32 gather_417_batch_dims_0 = const()[name = string("gather_417_batch_dims_0"), val = int32(0)]; bool gather_417_validate_indices_0 = const()[name = string("gather_417_validate_indices_0"), val = bool(false)]; string var_4882_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4882_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_417_to_uint16 = const()[name = string("select_417_to_uint16"), val = uint16(2)]; tensor var_4882_shape_cast_fp16_to_uint16 = cast(dtype = var_4882_shape_cast_fp16_to_uint16_dtype_0, x = var_4882_shape_cast_fp16)[name = string("cast_556")]; uint16 gather_417_cast_uint16 = gather(axis = gather_417_axis_0, batch_dims = gather_417_batch_dims_0, indices = select_417_to_uint16, validate_indices = gather_417_validate_indices_0, x = var_4882_shape_cast_fp16_to_uint16)[name = string("gather_417_cast_uint16")]; string gather_417_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_417_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_436_values0_0 = const()[name = string("concat_436_values0_0"), val = int32(1)]; int32 concat_436_values1_0 = const()[name = string("concat_436_values1_0"), val = int32(1)]; int32 concat_436_values2_0 = const()[name = string("concat_436_values2_0"), val = int32(0)]; int32 concat_436_axis_0 = const()[name = string("concat_436_axis_0"), val = int32(0)]; bool concat_436_interleave_0 = const()[name = string("concat_436_interleave_0"), val = bool(false)]; int32 gather_417_cast_uint16_to_int32 = cast(dtype = gather_417_cast_uint16_to_int32_dtype_0, x = gather_417_cast_uint16)[name = string("cast_555")]; tensor concat_436 = concat(axis = concat_436_axis_0, interleave = concat_436_interleave_0, values = (concat_436_values0_0, concat_436_values1_0, concat_436_values2_0, gather_417_cast_uint16_to_int32))[name = string("concat_436")]; tensor attention_mask_45_begin_0 = const()[name = string("attention_mask_45_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_45_end_mask_0 = const()[name = string("attention_mask_45_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_45_cast_fp16 = slice_by_index(begin = attention_mask_45_begin_0, end = concat_436, end_mask = attention_mask_45_end_mask_0, x = causal_mask)[name = string("attention_mask_45_cast_fp16")]; tensor mul_22_cast_fp16 = mul(x = query_89_cast_fp16, y = var_85_to_fp16)[name = string("mul_22_cast_fp16")]; bool matmul_22_transpose_y_0 = const()[name = string("matmul_22_transpose_y_0"), val = bool(true)]; bool matmul_22_transpose_x_0 = const()[name = string("matmul_22_transpose_x_0"), val = bool(false)]; tensor matmul_22_cast_fp16 = matmul(transpose_x = matmul_22_transpose_x_0, transpose_y = matmul_22_transpose_y_0, x = mul_22_cast_fp16, y = key_89_cast_fp16)[name = string("matmul_22_cast_fp16")]; tensor add_440_cast_fp16 = add(x = matmul_22_cast_fp16, y = attention_mask_45_cast_fp16)[name = string("add_440_cast_fp16")]; int32 softmax_22_axis_0 = const()[name = string("softmax_22_axis_0"), val = int32(-1)]; tensor softmax_22_cast_fp16 = softmax(axis = softmax_22_axis_0, x = add_440_cast_fp16)[name = string("softmax_22_cast_fp16")]; bool attn_output_89_transpose_x_0 = const()[name = string("attn_output_89_transpose_x_0"), val = bool(false)]; bool attn_output_89_transpose_y_0 = const()[name = string("attn_output_89_transpose_y_0"), val = bool(false)]; tensor attn_output_89_cast_fp16 = matmul(transpose_x = attn_output_89_transpose_x_0, transpose_y = attn_output_89_transpose_y_0, x = softmax_22_cast_fp16, y = value_89_cast_fp16)[name = string("attn_output_89_cast_fp16")]; tensor var_4891_perm_0 = const()[name = string("op_4891_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_437_axis_0 = const()[name = string("concat_437_axis_0"), val = int32(0)]; bool concat_437_interleave_0 = const()[name = string("concat_437_interleave_0"), val = bool(false)]; int32 gather_401_cast_uint16_to_int32 = cast(dtype = gather_401_cast_uint16_to_int32_dtype_0, x = gather_401_cast_uint16)[name = string("cast_561")]; tensor concat_437 = concat(axis = concat_437_axis_0, interleave = concat_437_interleave_0, values = (gather_400, gather_401_cast_uint16_to_int32, var_72))[name = string("concat_437")]; tensor var_4891_cast_fp16 = transpose(perm = var_4891_perm_0, x = attn_output_89_cast_fp16)[name = string("transpose_20")]; tensor var_4894_cast_fp16 = reshape(shape = concat_437, x = var_4891_cast_fp16)[name = string("op_4894_cast_fp16")]; tensor model_model_layers_22_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_22_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1011746304)))]; tensor linear_157_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_22_self_attn_o_proj_weight_to_fp16, x = var_4894_cast_fp16)[name = string("linear_157_cast_fp16")]; tensor hidden_states_1047_cast_fp16 = add(x = hidden_states_1011_cast_fp16, y = linear_157_cast_fp16)[name = string("hidden_states_1047_cast_fp16")]; fp16 var_78_promoted_91_to_fp16 = const()[name = string("op_78_promoted_91_to_fp16"), val = fp16(0x1p+1)]; tensor var_4901_cast_fp16 = pow(x = hidden_states_1047_cast_fp16, y = var_78_promoted_91_to_fp16)[name = string("op_4901_cast_fp16")]; tensor variance_183_axes_0 = const()[name = string("variance_183_axes_0"), val = tensor([-1])]; bool variance_183_keep_dims_0 = const()[name = string("variance_183_keep_dims_0"), val = bool(true)]; tensor variance_183_cast_fp16 = reduce_mean(axes = variance_183_axes_0, keep_dims = variance_183_keep_dims_0, x = var_4901_cast_fp16)[name = string("variance_183_cast_fp16")]; fp16 var_4904_to_fp16 = const()[name = string("op_4904_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4905_cast_fp16 = add(x = variance_183_cast_fp16, y = var_4904_to_fp16)[name = string("op_4905_cast_fp16")]; fp32 var_4906_epsilon_0 = const()[name = string("op_4906_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4906_cast_fp16 = rsqrt(epsilon = var_4906_epsilon_0, x = var_4905_cast_fp16)[name = string("op_4906_cast_fp16")]; tensor hidden_states_1051_cast_fp16 = mul(x = hidden_states_1047_cast_fp16, y = var_4906_cast_fp16)[name = string("hidden_states_1051_cast_fp16")]; tensor model_model_layers_22_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_22_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1015940672)))]; tensor input_179_cast_fp16 = mul(x = model_model_layers_22_post_attention_layernorm_weight_to_fp16, y = hidden_states_1051_cast_fp16)[name = string("input_179_cast_fp16")]; tensor model_model_layers_22_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_22_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1015942784)))]; tensor linear_158_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_22_mlp_gate_proj_weight_to_fp16, x = input_179_cast_fp16)[name = string("linear_158_cast_fp16")]; tensor var_4918_cast_fp16 = silu(x = linear_158_cast_fp16)[name = string("op_4918_cast_fp16")]; tensor model_model_layers_22_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_22_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1022234304)))]; tensor linear_159_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_22_mlp_up_proj_weight_to_fp16, x = input_179_cast_fp16)[name = string("linear_159_cast_fp16")]; tensor input_183_cast_fp16 = mul(x = var_4918_cast_fp16, y = linear_159_cast_fp16)[name = string("input_183_cast_fp16")]; tensor model_model_layers_22_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_22_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1028525824)))]; tensor linear_160_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_22_mlp_down_proj_weight_to_fp16, x = input_183_cast_fp16)[name = string("linear_160_cast_fp16")]; tensor hidden_states_1057_cast_fp16 = add(x = hidden_states_1047_cast_fp16, y = linear_160_cast_fp16)[name = string("hidden_states_1057_cast_fp16")]; fp16 var_78_promoted_92_to_fp16 = const()[name = string("op_78_promoted_92_to_fp16"), val = fp16(0x1p+1)]; tensor var_4931_cast_fp16 = pow(x = hidden_states_1057_cast_fp16, y = var_78_promoted_92_to_fp16)[name = string("op_4931_cast_fp16")]; tensor variance_185_axes_0 = const()[name = string("variance_185_axes_0"), val = tensor([-1])]; bool variance_185_keep_dims_0 = const()[name = string("variance_185_keep_dims_0"), val = bool(true)]; tensor variance_185_cast_fp16 = reduce_mean(axes = variance_185_axes_0, keep_dims = variance_185_keep_dims_0, x = var_4931_cast_fp16)[name = string("variance_185_cast_fp16")]; fp16 var_4934_to_fp16 = const()[name = string("op_4934_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4935_cast_fp16 = add(x = variance_185_cast_fp16, y = var_4934_to_fp16)[name = string("op_4935_cast_fp16")]; fp32 var_4936_epsilon_0 = const()[name = string("op_4936_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4936_cast_fp16 = rsqrt(epsilon = var_4936_epsilon_0, x = var_4935_cast_fp16)[name = string("op_4936_cast_fp16")]; tensor hidden_states_1061_cast_fp16 = mul(x = hidden_states_1057_cast_fp16, y = var_4936_cast_fp16)[name = string("hidden_states_1061_cast_fp16")]; tensor model_model_layers_23_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1034817344)))]; tensor hidden_states_1065_cast_fp16 = mul(x = model_model_layers_23_input_layernorm_weight_to_fp16, y = hidden_states_1061_cast_fp16)[name = string("hidden_states_1065_cast_fp16")]; tensor var_4949_shape_cast_fp16 = shape(x = hidden_states_1065_cast_fp16)[name = string("op_4949_shape_cast_fp16")]; int32 gather_418 = const()[name = string("gather_418"), val = int32(1)]; int32 gather_419_axis_0 = const()[name = string("gather_419_axis_0"), val = int32(0)]; int32 gather_419_batch_dims_0 = const()[name = string("gather_419_batch_dims_0"), val = int32(0)]; bool gather_419_validate_indices_0 = const()[name = string("gather_419_validate_indices_0"), val = bool(false)]; string var_4949_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_4949_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_419_to_uint16 = const()[name = string("select_419_to_uint16"), val = uint16(1)]; tensor var_4949_shape_cast_fp16_to_uint16 = cast(dtype = var_4949_shape_cast_fp16_to_uint16_dtype_0, x = var_4949_shape_cast_fp16)[name = string("cast_554")]; uint16 gather_419_cast_uint16 = gather(axis = gather_419_axis_0, batch_dims = gather_419_batch_dims_0, indices = select_419_to_uint16, validate_indices = gather_419_validate_indices_0, x = var_4949_shape_cast_fp16_to_uint16)[name = string("gather_419_cast_uint16")]; string gather_419_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_419_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_23_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1034819456)))]; tensor linear_161_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_23_self_attn_q_proj_weight_to_fp16, x = hidden_states_1065_cast_fp16)[name = string("linear_161_cast_fp16")]; tensor concat_438x = const()[name = string("concat_438x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_1067_cast_fp16 = reshape(shape = concat_438x, x = linear_161_cast_fp16)[name = string("hidden_states_1067_cast_fp16")]; fp16 var_78_promoted_93_to_fp16 = const()[name = string("op_78_promoted_93_to_fp16"), val = fp16(0x1p+1)]; tensor var_4957_cast_fp16 = pow(x = hidden_states_1067_cast_fp16, y = var_78_promoted_93_to_fp16)[name = string("op_4957_cast_fp16")]; tensor variance_187_axes_0 = const()[name = string("variance_187_axes_0"), val = tensor([-1])]; bool variance_187_keep_dims_0 = const()[name = string("variance_187_keep_dims_0"), val = bool(true)]; tensor variance_187_cast_fp16 = reduce_mean(axes = variance_187_axes_0, keep_dims = variance_187_keep_dims_0, x = var_4957_cast_fp16)[name = string("variance_187_cast_fp16")]; fp16 var_4960_to_fp16 = const()[name = string("op_4960_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4961_cast_fp16 = add(x = variance_187_cast_fp16, y = var_4960_to_fp16)[name = string("op_4961_cast_fp16")]; fp32 var_4962_epsilon_0 = const()[name = string("op_4962_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4962_cast_fp16 = rsqrt(epsilon = var_4962_epsilon_0, x = var_4961_cast_fp16)[name = string("op_4962_cast_fp16")]; tensor hidden_states_1071_cast_fp16 = mul(x = hidden_states_1067_cast_fp16, y = var_4962_cast_fp16)[name = string("hidden_states_1071_cast_fp16")]; tensor model_model_layers_23_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039013824)))]; tensor var_4965_cast_fp16 = mul(x = model_model_layers_23_self_attn_q_norm_weight_to_fp16, y = hidden_states_1071_cast_fp16)[name = string("op_4965_cast_fp16")]; tensor q_47_perm_0 = const()[name = string("q_47_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_23_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1039014144)))]; tensor linear_162_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_23_self_attn_k_proj_weight_to_fp16, x = hidden_states_1065_cast_fp16)[name = string("linear_162_cast_fp16")]; tensor concat_439x = const()[name = string("concat_439x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_1075_cast_fp16 = reshape(shape = concat_439x, x = linear_162_cast_fp16)[name = string("hidden_states_1075_cast_fp16")]; fp16 var_78_promoted_94_to_fp16 = const()[name = string("op_78_promoted_94_to_fp16"), val = fp16(0x1p+1)]; tensor var_4973_cast_fp16 = pow(x = hidden_states_1075_cast_fp16, y = var_78_promoted_94_to_fp16)[name = string("op_4973_cast_fp16")]; tensor variance_189_axes_0 = const()[name = string("variance_189_axes_0"), val = tensor([-1])]; bool variance_189_keep_dims_0 = const()[name = string("variance_189_keep_dims_0"), val = bool(true)]; tensor variance_189_cast_fp16 = reduce_mean(axes = variance_189_axes_0, keep_dims = variance_189_keep_dims_0, x = var_4973_cast_fp16)[name = string("variance_189_cast_fp16")]; fp16 var_4976_to_fp16 = const()[name = string("op_4976_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_4977_cast_fp16 = add(x = variance_189_cast_fp16, y = var_4976_to_fp16)[name = string("op_4977_cast_fp16")]; fp32 var_4978_epsilon_0 = const()[name = string("op_4978_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_4978_cast_fp16 = rsqrt(epsilon = var_4978_epsilon_0, x = var_4977_cast_fp16)[name = string("op_4978_cast_fp16")]; tensor hidden_states_1079_cast_fp16 = mul(x = hidden_states_1075_cast_fp16, y = var_4978_cast_fp16)[name = string("hidden_states_1079_cast_fp16")]; tensor model_model_layers_23_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1041111360)))]; tensor var_4981_cast_fp16 = mul(x = model_model_layers_23_self_attn_k_norm_weight_to_fp16, y = hidden_states_1079_cast_fp16)[name = string("op_4981_cast_fp16")]; tensor k_47_perm_0 = const()[name = string("k_47_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_23_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1041111680)))]; tensor linear_163_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_23_self_attn_v_proj_weight_to_fp16, x = hidden_states_1065_cast_fp16)[name = string("linear_163_cast_fp16")]; tensor concat_440x = const()[name = string("concat_440x"), val = tensor([1, -1, 8, 128])]; tensor var_4986_cast_fp16 = reshape(shape = concat_440x, x = linear_163_cast_fp16)[name = string("op_4986_cast_fp16")]; tensor v_state_47_perm_0 = const()[name = string("v_state_47_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_47_cast_fp16 = transpose(perm = q_47_perm_0, x = var_4965_cast_fp16)[name = string("transpose_19")]; tensor var_4990_cast_fp16 = mul(x = q_47_cast_fp16, y = cos_5_cast_fp16)[name = string("op_4990_cast_fp16")]; tensor x1_93_begin_0 = const()[name = string("x1_93_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_93_end_0 = const()[name = string("x1_93_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_93_end_mask_0 = const()[name = string("x1_93_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_93_cast_fp16 = slice_by_index(begin = x1_93_begin_0, end = x1_93_end_0, end_mask = x1_93_end_mask_0, x = q_47_cast_fp16)[name = string("x1_93_cast_fp16")]; tensor x2_93_begin_0 = const()[name = string("x2_93_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_93_end_0 = const()[name = string("x2_93_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_93_end_mask_0 = const()[name = string("x2_93_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_93_cast_fp16 = slice_by_index(begin = x2_93_begin_0, end = x2_93_end_0, end_mask = x2_93_end_mask_0, x = q_47_cast_fp16)[name = string("x2_93_cast_fp16")]; fp16 const_50_promoted_to_fp16 = const()[name = string("const_50_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5001_cast_fp16 = mul(x = x2_93_cast_fp16, y = const_50_promoted_to_fp16)[name = string("op_5001_cast_fp16")]; bool var_5003_interleave_0 = const()[name = string("op_5003_interleave_0"), val = bool(false)]; tensor var_5003_cast_fp16 = concat(axis = var_72, interleave = var_5003_interleave_0, values = (var_5001_cast_fp16, x1_93_cast_fp16))[name = string("op_5003_cast_fp16")]; tensor var_5004_cast_fp16 = mul(x = var_5003_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5004_cast_fp16")]; tensor query_93_cast_fp16 = add(x = var_4990_cast_fp16, y = var_5004_cast_fp16)[name = string("query_93_cast_fp16")]; tensor k_47_cast_fp16 = transpose(perm = k_47_perm_0, x = var_4981_cast_fp16)[name = string("transpose_18")]; tensor var_5006_cast_fp16 = mul(x = k_47_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5006_cast_fp16")]; tensor x1_95_begin_0 = const()[name = string("x1_95_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_95_end_0 = const()[name = string("x1_95_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_95_end_mask_0 = const()[name = string("x1_95_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_95_cast_fp16 = slice_by_index(begin = x1_95_begin_0, end = x1_95_end_0, end_mask = x1_95_end_mask_0, x = k_47_cast_fp16)[name = string("x1_95_cast_fp16")]; tensor x2_95_begin_0 = const()[name = string("x2_95_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_95_end_0 = const()[name = string("x2_95_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_95_end_mask_0 = const()[name = string("x2_95_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_95_cast_fp16 = slice_by_index(begin = x2_95_begin_0, end = x2_95_end_0, end_mask = x2_95_end_mask_0, x = k_47_cast_fp16)[name = string("x2_95_cast_fp16")]; fp16 const_51_promoted_to_fp16 = const()[name = string("const_51_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5017_cast_fp16 = mul(x = x2_95_cast_fp16, y = const_51_promoted_to_fp16)[name = string("op_5017_cast_fp16")]; bool var_5019_interleave_0 = const()[name = string("op_5019_interleave_0"), val = bool(false)]; tensor var_5019_cast_fp16 = concat(axis = var_72, interleave = var_5019_interleave_0, values = (var_5017_cast_fp16, x1_95_cast_fp16))[name = string("op_5019_cast_fp16")]; tensor var_5020_cast_fp16 = mul(x = var_5019_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5020_cast_fp16")]; tensor k_state_47_cast_fp16 = add(x = var_5006_cast_fp16, y = var_5020_cast_fp16)[name = string("k_state_47_cast_fp16")]; tensor expand_dims_276 = const()[name = string("expand_dims_276"), val = tensor([0])]; tensor expand_dims_277 = const()[name = string("expand_dims_277"), val = tensor([0])]; tensor expand_dims_279 = const()[name = string("expand_dims_279"), val = tensor([0])]; tensor concat_443_values0_0 = const()[name = string("concat_443_values0_0"), val = tensor([23])]; int32 concat_443_axis_0 = const()[name = string("concat_443_axis_0"), val = int32(0)]; bool concat_443_interleave_0 = const()[name = string("concat_443_interleave_0"), val = bool(false)]; tensor concat_443 = concat(axis = concat_443_axis_0, interleave = concat_443_interleave_0, values = (concat_443_values0_0, expand_dims_276, expand_dims_277, expand_dims_2, expand_dims_279))[name = string("concat_443")]; tensor key_cache_internal_tensor_assign_24_stride_0 = const()[name = string("key_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_443, begin_mask = key_cache_internal_tensor_assign_24_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_24_squeeze_mask_0, stride = key_cache_internal_tensor_assign_24_stride_0, update = k_state_47_cast_fp16, x = coreml_update_state_100)[name = string("key_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_24_cast_fp16, input = key_cache)[name = string("coreml_update_state_102_write_state")]; tensor coreml_update_state_102 = read_state(input = key_cache)[name = string("coreml_update_state_102")]; tensor value_cache_internal_tensor_assign_24_stride_0 = const()[name = string("value_cache_internal_tensor_assign_24_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_24_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_24_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_24_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_24_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_47_cast_fp16 = transpose(perm = v_state_47_perm_0, x = var_4986_cast_fp16)[name = string("transpose_17")]; tensor value_cache_internal_tensor_assign_24_cast_fp16 = slice_update(begin = concat_443, begin_mask = value_cache_internal_tensor_assign_24_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_24_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_24_squeeze_mask_0, stride = value_cache_internal_tensor_assign_24_stride_0, update = v_state_47_cast_fp16, x = coreml_update_state_101)[name = string("value_cache_internal_tensor_assign_24_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_24_cast_fp16, input = value_cache)[name = string("coreml_update_state_103_write_state")]; tensor coreml_update_state_103 = read_state(input = value_cache)[name = string("coreml_update_state_103")]; tensor var_5043_begin_0 = const()[name = string("op_5043_begin_0"), val = tensor([23, 0, 0, 0, 0])]; tensor var_5043_end_0 = const()[name = string("op_5043_end_0"), val = tensor([24, 1, 8, 2048, 128])]; tensor var_5043_end_mask_0 = const()[name = string("op_5043_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5043_squeeze_mask_0 = const()[name = string("op_5043_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_5043_cast_fp16 = slice_by_index(begin = var_5043_begin_0, end = var_5043_end_0, end_mask = var_5043_end_mask_0, squeeze_mask = var_5043_squeeze_mask_0, x = coreml_update_state_102)[name = string("op_5043_cast_fp16")]; tensor var_5046_begin_0 = const()[name = string("op_5046_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5046_end_mask_0 = const()[name = string("op_5046_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5046_cast_fp16 = slice_by_index(begin = var_5046_begin_0, end = concat_12, end_mask = var_5046_end_mask_0, x = var_5043_cast_fp16)[name = string("op_5046_cast_fp16")]; tensor var_5048_begin_0 = const()[name = string("op_5048_begin_0"), val = tensor([23, 0, 0, 0, 0])]; tensor var_5048_end_0 = const()[name = string("op_5048_end_0"), val = tensor([24, 1, 8, 2048, 128])]; tensor var_5048_end_mask_0 = const()[name = string("op_5048_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5048_squeeze_mask_0 = const()[name = string("op_5048_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_5048_cast_fp16 = slice_by_index(begin = var_5048_begin_0, end = var_5048_end_0, end_mask = var_5048_end_mask_0, squeeze_mask = var_5048_squeeze_mask_0, x = coreml_update_state_103)[name = string("op_5048_cast_fp16")]; tensor var_5051_begin_0 = const()[name = string("op_5051_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5051_end_mask_0 = const()[name = string("op_5051_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5051_cast_fp16 = slice_by_index(begin = var_5051_begin_0, end = concat_12, end_mask = var_5051_end_mask_0, x = var_5048_cast_fp16)[name = string("op_5051_cast_fp16")]; tensor var_5053_shape_cast_fp16 = shape(x = var_5046_cast_fp16)[name = string("op_5053_shape_cast_fp16")]; int32 gather_427 = const()[name = string("gather_427"), val = int32(1)]; int32 gather_428 = const()[name = string("gather_428"), val = int32(8)]; int32 gather_429_axis_0 = const()[name = string("gather_429_axis_0"), val = int32(0)]; int32 gather_429_batch_dims_0 = const()[name = string("gather_429_batch_dims_0"), val = int32(0)]; bool gather_429_validate_indices_0 = const()[name = string("gather_429_validate_indices_0"), val = bool(false)]; string var_5053_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5053_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_429_to_uint16 = const()[name = string("select_429_to_uint16"), val = uint16(2)]; tensor var_5053_shape_cast_fp16_to_uint16 = cast(dtype = var_5053_shape_cast_fp16_to_uint16_dtype_0, x = var_5053_shape_cast_fp16)[name = string("cast_552")]; uint16 gather_429_cast_uint16 = gather(axis = gather_429_axis_0, batch_dims = gather_429_batch_dims_0, indices = select_429_to_uint16, validate_indices = gather_429_validate_indices_0, x = var_5053_shape_cast_fp16_to_uint16)[name = string("gather_429_cast_uint16")]; string gather_429_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_429_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_430 = const()[name = string("gather_430"), val = int32(128)]; tensor var_5060_axes_0 = const()[name = string("op_5060_axes_0"), val = tensor([2])]; tensor var_5060_cast_fp16 = expand_dims(axes = var_5060_axes_0, x = var_5046_cast_fp16)[name = string("op_5060_cast_fp16")]; int32 concat_451_axis_0 = const()[name = string("concat_451_axis_0"), val = int32(0)]; bool concat_451_interleave_0 = const()[name = string("concat_451_interleave_0"), val = bool(false)]; int32 gather_429_cast_uint16_to_int32 = cast(dtype = gather_429_cast_uint16_to_int32_dtype_0, x = gather_429_cast_uint16)[name = string("cast_551")]; tensor concat_451 = concat(axis = concat_451_axis_0, interleave = concat_451_interleave_0, values = (gather_427, gather_428, var_78, gather_429_cast_uint16_to_int32, gather_430))[name = string("concat_451")]; tensor shape_477_cast_fp16 = shape(x = var_5060_cast_fp16)[name = string("shape_477_cast_fp16")]; tensor real_div_46 = real_div(x = concat_451, y = shape_477_cast_fp16)[name = string("real_div_46")]; tensor hidden_states_1085_cast_fp16 = tile(reps = real_div_46, x = var_5060_cast_fp16)[name = string("hidden_states_1085_cast_fp16")]; tensor concat_452x = const()[name = string("concat_452x"), val = tensor([1, 16, -1, 128])]; tensor key_93_cast_fp16 = reshape(shape = concat_452x, x = hidden_states_1085_cast_fp16)[name = string("key_93_cast_fp16")]; tensor var_5070_shape_cast_fp16 = shape(x = var_5051_cast_fp16)[name = string("op_5070_shape_cast_fp16")]; int32 gather_431 = const()[name = string("gather_431"), val = int32(1)]; int32 gather_432 = const()[name = string("gather_432"), val = int32(8)]; int32 gather_433_axis_0 = const()[name = string("gather_433_axis_0"), val = int32(0)]; int32 gather_433_batch_dims_0 = const()[name = string("gather_433_batch_dims_0"), val = int32(0)]; bool gather_433_validate_indices_0 = const()[name = string("gather_433_validate_indices_0"), val = bool(false)]; string var_5070_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5070_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_433_to_uint16 = const()[name = string("select_433_to_uint16"), val = uint16(2)]; tensor var_5070_shape_cast_fp16_to_uint16 = cast(dtype = var_5070_shape_cast_fp16_to_uint16_dtype_0, x = var_5070_shape_cast_fp16)[name = string("cast_550")]; uint16 gather_433_cast_uint16 = gather(axis = gather_433_axis_0, batch_dims = gather_433_batch_dims_0, indices = select_433_to_uint16, validate_indices = gather_433_validate_indices_0, x = var_5070_shape_cast_fp16_to_uint16)[name = string("gather_433_cast_uint16")]; string gather_433_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_433_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_434 = const()[name = string("gather_434"), val = int32(128)]; tensor var_5077_axes_0 = const()[name = string("op_5077_axes_0"), val = tensor([2])]; tensor var_5077_cast_fp16 = expand_dims(axes = var_5077_axes_0, x = var_5051_cast_fp16)[name = string("op_5077_cast_fp16")]; int32 concat_453_axis_0 = const()[name = string("concat_453_axis_0"), val = int32(0)]; bool concat_453_interleave_0 = const()[name = string("concat_453_interleave_0"), val = bool(false)]; int32 gather_433_cast_uint16_to_int32 = cast(dtype = gather_433_cast_uint16_to_int32_dtype_0, x = gather_433_cast_uint16)[name = string("cast_549")]; tensor concat_453 = concat(axis = concat_453_axis_0, interleave = concat_453_interleave_0, values = (gather_431, gather_432, var_78, gather_433_cast_uint16_to_int32, gather_434))[name = string("concat_453")]; tensor shape_482_cast_fp16 = shape(x = var_5077_cast_fp16)[name = string("shape_482_cast_fp16")]; tensor real_div_47 = real_div(x = concat_453, y = shape_482_cast_fp16)[name = string("real_div_47")]; tensor hidden_states_1089_cast_fp16 = tile(reps = real_div_47, x = var_5077_cast_fp16)[name = string("hidden_states_1089_cast_fp16")]; tensor concat_454x = const()[name = string("concat_454x"), val = tensor([1, 16, -1, 128])]; tensor value_93_cast_fp16 = reshape(shape = concat_454x, x = hidden_states_1089_cast_fp16)[name = string("value_93_cast_fp16")]; tensor var_5087_shape_cast_fp16 = shape(x = key_93_cast_fp16)[name = string("op_5087_shape_cast_fp16")]; int32 gather_435_axis_0 = const()[name = string("gather_435_axis_0"), val = int32(0)]; int32 gather_435_batch_dims_0 = const()[name = string("gather_435_batch_dims_0"), val = int32(0)]; bool gather_435_validate_indices_0 = const()[name = string("gather_435_validate_indices_0"), val = bool(false)]; string var_5087_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5087_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_435_to_uint16 = const()[name = string("select_435_to_uint16"), val = uint16(2)]; tensor var_5087_shape_cast_fp16_to_uint16 = cast(dtype = var_5087_shape_cast_fp16_to_uint16_dtype_0, x = var_5087_shape_cast_fp16)[name = string("cast_548")]; uint16 gather_435_cast_uint16 = gather(axis = gather_435_axis_0, batch_dims = gather_435_batch_dims_0, indices = select_435_to_uint16, validate_indices = gather_435_validate_indices_0, x = var_5087_shape_cast_fp16_to_uint16)[name = string("gather_435_cast_uint16")]; string gather_435_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_435_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_455_values0_0 = const()[name = string("concat_455_values0_0"), val = int32(1)]; int32 concat_455_values1_0 = const()[name = string("concat_455_values1_0"), val = int32(1)]; int32 concat_455_values2_0 = const()[name = string("concat_455_values2_0"), val = int32(0)]; int32 concat_455_axis_0 = const()[name = string("concat_455_axis_0"), val = int32(0)]; bool concat_455_interleave_0 = const()[name = string("concat_455_interleave_0"), val = bool(false)]; int32 gather_435_cast_uint16_to_int32 = cast(dtype = gather_435_cast_uint16_to_int32_dtype_0, x = gather_435_cast_uint16)[name = string("cast_547")]; tensor concat_455 = concat(axis = concat_455_axis_0, interleave = concat_455_interleave_0, values = (concat_455_values0_0, concat_455_values1_0, concat_455_values2_0, gather_435_cast_uint16_to_int32))[name = string("concat_455")]; tensor attention_mask_47_begin_0 = const()[name = string("attention_mask_47_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_47_end_mask_0 = const()[name = string("attention_mask_47_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_47_cast_fp16 = slice_by_index(begin = attention_mask_47_begin_0, end = concat_455, end_mask = attention_mask_47_end_mask_0, x = causal_mask)[name = string("attention_mask_47_cast_fp16")]; tensor mul_23_cast_fp16 = mul(x = query_93_cast_fp16, y = var_85_to_fp16)[name = string("mul_23_cast_fp16")]; bool matmul_23_transpose_y_0 = const()[name = string("matmul_23_transpose_y_0"), val = bool(true)]; bool matmul_23_transpose_x_0 = const()[name = string("matmul_23_transpose_x_0"), val = bool(false)]; tensor matmul_23_cast_fp16 = matmul(transpose_x = matmul_23_transpose_x_0, transpose_y = matmul_23_transpose_y_0, x = mul_23_cast_fp16, y = key_93_cast_fp16)[name = string("matmul_23_cast_fp16")]; tensor add_459_cast_fp16 = add(x = matmul_23_cast_fp16, y = attention_mask_47_cast_fp16)[name = string("add_459_cast_fp16")]; int32 softmax_23_axis_0 = const()[name = string("softmax_23_axis_0"), val = int32(-1)]; tensor softmax_23_cast_fp16 = softmax(axis = softmax_23_axis_0, x = add_459_cast_fp16)[name = string("softmax_23_cast_fp16")]; bool attn_output_93_transpose_x_0 = const()[name = string("attn_output_93_transpose_x_0"), val = bool(false)]; bool attn_output_93_transpose_y_0 = const()[name = string("attn_output_93_transpose_y_0"), val = bool(false)]; tensor attn_output_93_cast_fp16 = matmul(transpose_x = attn_output_93_transpose_x_0, transpose_y = attn_output_93_transpose_y_0, x = softmax_23_cast_fp16, y = value_93_cast_fp16)[name = string("attn_output_93_cast_fp16")]; tensor var_5096_perm_0 = const()[name = string("op_5096_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_456_axis_0 = const()[name = string("concat_456_axis_0"), val = int32(0)]; bool concat_456_interleave_0 = const()[name = string("concat_456_interleave_0"), val = bool(false)]; int32 gather_419_cast_uint16_to_int32 = cast(dtype = gather_419_cast_uint16_to_int32_dtype_0, x = gather_419_cast_uint16)[name = string("cast_553")]; tensor concat_456 = concat(axis = concat_456_axis_0, interleave = concat_456_interleave_0, values = (gather_418, gather_419_cast_uint16_to_int32, var_72))[name = string("concat_456")]; tensor var_5096_cast_fp16 = transpose(perm = var_5096_perm_0, x = attn_output_93_cast_fp16)[name = string("transpose_16")]; tensor var_5099_cast_fp16 = reshape(shape = concat_456, x = var_5096_cast_fp16)[name = string("op_5099_cast_fp16")]; tensor model_model_layers_23_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_23_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1043208896)))]; tensor linear_164_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_23_self_attn_o_proj_weight_to_fp16, x = var_5099_cast_fp16)[name = string("linear_164_cast_fp16")]; tensor hidden_states_1093_cast_fp16 = add(x = hidden_states_1057_cast_fp16, y = linear_164_cast_fp16)[name = string("hidden_states_1093_cast_fp16")]; fp16 var_78_promoted_95_to_fp16 = const()[name = string("op_78_promoted_95_to_fp16"), val = fp16(0x1p+1)]; tensor var_5106_cast_fp16 = pow(x = hidden_states_1093_cast_fp16, y = var_78_promoted_95_to_fp16)[name = string("op_5106_cast_fp16")]; tensor variance_191_axes_0 = const()[name = string("variance_191_axes_0"), val = tensor([-1])]; bool variance_191_keep_dims_0 = const()[name = string("variance_191_keep_dims_0"), val = bool(true)]; tensor variance_191_cast_fp16 = reduce_mean(axes = variance_191_axes_0, keep_dims = variance_191_keep_dims_0, x = var_5106_cast_fp16)[name = string("variance_191_cast_fp16")]; fp16 var_5109_to_fp16 = const()[name = string("op_5109_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5110_cast_fp16 = add(x = variance_191_cast_fp16, y = var_5109_to_fp16)[name = string("op_5110_cast_fp16")]; fp32 var_5111_epsilon_0 = const()[name = string("op_5111_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5111_cast_fp16 = rsqrt(epsilon = var_5111_epsilon_0, x = var_5110_cast_fp16)[name = string("op_5111_cast_fp16")]; tensor hidden_states_1097_cast_fp16 = mul(x = hidden_states_1093_cast_fp16, y = var_5111_cast_fp16)[name = string("hidden_states_1097_cast_fp16")]; tensor model_model_layers_23_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_23_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1047403264)))]; tensor input_187_cast_fp16 = mul(x = model_model_layers_23_post_attention_layernorm_weight_to_fp16, y = hidden_states_1097_cast_fp16)[name = string("input_187_cast_fp16")]; tensor model_model_layers_23_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_23_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1047405376)))]; tensor linear_165_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_23_mlp_gate_proj_weight_to_fp16, x = input_187_cast_fp16)[name = string("linear_165_cast_fp16")]; tensor var_5123_cast_fp16 = silu(x = linear_165_cast_fp16)[name = string("op_5123_cast_fp16")]; tensor model_model_layers_23_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_23_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1053696896)))]; tensor linear_166_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_23_mlp_up_proj_weight_to_fp16, x = input_187_cast_fp16)[name = string("linear_166_cast_fp16")]; tensor input_191_cast_fp16 = mul(x = var_5123_cast_fp16, y = linear_166_cast_fp16)[name = string("input_191_cast_fp16")]; tensor model_model_layers_23_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_23_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1059988416)))]; tensor linear_167_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_23_mlp_down_proj_weight_to_fp16, x = input_191_cast_fp16)[name = string("linear_167_cast_fp16")]; tensor hidden_states_1103_cast_fp16 = add(x = hidden_states_1093_cast_fp16, y = linear_167_cast_fp16)[name = string("hidden_states_1103_cast_fp16")]; fp16 var_78_promoted_96_to_fp16 = const()[name = string("op_78_promoted_96_to_fp16"), val = fp16(0x1p+1)]; tensor var_5136_cast_fp16 = pow(x = hidden_states_1103_cast_fp16, y = var_78_promoted_96_to_fp16)[name = string("op_5136_cast_fp16")]; tensor variance_193_axes_0 = const()[name = string("variance_193_axes_0"), val = tensor([-1])]; bool variance_193_keep_dims_0 = const()[name = string("variance_193_keep_dims_0"), val = bool(true)]; tensor variance_193_cast_fp16 = reduce_mean(axes = variance_193_axes_0, keep_dims = variance_193_keep_dims_0, x = var_5136_cast_fp16)[name = string("variance_193_cast_fp16")]; fp16 var_5139_to_fp16 = const()[name = string("op_5139_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5140_cast_fp16 = add(x = variance_193_cast_fp16, y = var_5139_to_fp16)[name = string("op_5140_cast_fp16")]; fp32 var_5141_epsilon_0 = const()[name = string("op_5141_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5141_cast_fp16 = rsqrt(epsilon = var_5141_epsilon_0, x = var_5140_cast_fp16)[name = string("op_5141_cast_fp16")]; tensor hidden_states_1107_cast_fp16 = mul(x = hidden_states_1103_cast_fp16, y = var_5141_cast_fp16)[name = string("hidden_states_1107_cast_fp16")]; tensor model_model_layers_24_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066279936)))]; tensor hidden_states_1111_cast_fp16 = mul(x = model_model_layers_24_input_layernorm_weight_to_fp16, y = hidden_states_1107_cast_fp16)[name = string("hidden_states_1111_cast_fp16")]; tensor var_5154_shape_cast_fp16 = shape(x = hidden_states_1111_cast_fp16)[name = string("op_5154_shape_cast_fp16")]; int32 gather_436 = const()[name = string("gather_436"), val = int32(1)]; int32 gather_437_axis_0 = const()[name = string("gather_437_axis_0"), val = int32(0)]; int32 gather_437_batch_dims_0 = const()[name = string("gather_437_batch_dims_0"), val = int32(0)]; bool gather_437_validate_indices_0 = const()[name = string("gather_437_validate_indices_0"), val = bool(false)]; string var_5154_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5154_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_437_to_uint16 = const()[name = string("select_437_to_uint16"), val = uint16(1)]; tensor var_5154_shape_cast_fp16_to_uint16 = cast(dtype = var_5154_shape_cast_fp16_to_uint16_dtype_0, x = var_5154_shape_cast_fp16)[name = string("cast_546")]; uint16 gather_437_cast_uint16 = gather(axis = gather_437_axis_0, batch_dims = gather_437_batch_dims_0, indices = select_437_to_uint16, validate_indices = gather_437_validate_indices_0, x = var_5154_shape_cast_fp16_to_uint16)[name = string("gather_437_cast_uint16")]; string gather_437_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_437_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_24_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1066282048)))]; tensor linear_168_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_24_self_attn_q_proj_weight_to_fp16, x = hidden_states_1111_cast_fp16)[name = string("linear_168_cast_fp16")]; tensor concat_457x = const()[name = string("concat_457x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_1113_cast_fp16 = reshape(shape = concat_457x, x = linear_168_cast_fp16)[name = string("hidden_states_1113_cast_fp16")]; fp16 var_78_promoted_97_to_fp16 = const()[name = string("op_78_promoted_97_to_fp16"), val = fp16(0x1p+1)]; tensor var_5162_cast_fp16 = pow(x = hidden_states_1113_cast_fp16, y = var_78_promoted_97_to_fp16)[name = string("op_5162_cast_fp16")]; tensor variance_195_axes_0 = const()[name = string("variance_195_axes_0"), val = tensor([-1])]; bool variance_195_keep_dims_0 = const()[name = string("variance_195_keep_dims_0"), val = bool(true)]; tensor variance_195_cast_fp16 = reduce_mean(axes = variance_195_axes_0, keep_dims = variance_195_keep_dims_0, x = var_5162_cast_fp16)[name = string("variance_195_cast_fp16")]; fp16 var_5165_to_fp16 = const()[name = string("op_5165_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5166_cast_fp16 = add(x = variance_195_cast_fp16, y = var_5165_to_fp16)[name = string("op_5166_cast_fp16")]; fp32 var_5167_epsilon_0 = const()[name = string("op_5167_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5167_cast_fp16 = rsqrt(epsilon = var_5167_epsilon_0, x = var_5166_cast_fp16)[name = string("op_5167_cast_fp16")]; tensor hidden_states_1117_cast_fp16 = mul(x = hidden_states_1113_cast_fp16, y = var_5167_cast_fp16)[name = string("hidden_states_1117_cast_fp16")]; tensor model_model_layers_24_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1070476416)))]; tensor var_5170_cast_fp16 = mul(x = model_model_layers_24_self_attn_q_norm_weight_to_fp16, y = hidden_states_1117_cast_fp16)[name = string("op_5170_cast_fp16")]; tensor q_49_perm_0 = const()[name = string("q_49_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_24_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1070476736)))]; tensor linear_169_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_24_self_attn_k_proj_weight_to_fp16, x = hidden_states_1111_cast_fp16)[name = string("linear_169_cast_fp16")]; tensor concat_458x = const()[name = string("concat_458x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_1121_cast_fp16 = reshape(shape = concat_458x, x = linear_169_cast_fp16)[name = string("hidden_states_1121_cast_fp16")]; fp16 var_78_promoted_98_to_fp16 = const()[name = string("op_78_promoted_98_to_fp16"), val = fp16(0x1p+1)]; tensor var_5178_cast_fp16 = pow(x = hidden_states_1121_cast_fp16, y = var_78_promoted_98_to_fp16)[name = string("op_5178_cast_fp16")]; tensor variance_197_axes_0 = const()[name = string("variance_197_axes_0"), val = tensor([-1])]; bool variance_197_keep_dims_0 = const()[name = string("variance_197_keep_dims_0"), val = bool(true)]; tensor variance_197_cast_fp16 = reduce_mean(axes = variance_197_axes_0, keep_dims = variance_197_keep_dims_0, x = var_5178_cast_fp16)[name = string("variance_197_cast_fp16")]; fp16 var_5181_to_fp16 = const()[name = string("op_5181_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5182_cast_fp16 = add(x = variance_197_cast_fp16, y = var_5181_to_fp16)[name = string("op_5182_cast_fp16")]; fp32 var_5183_epsilon_0 = const()[name = string("op_5183_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5183_cast_fp16 = rsqrt(epsilon = var_5183_epsilon_0, x = var_5182_cast_fp16)[name = string("op_5183_cast_fp16")]; tensor hidden_states_1125_cast_fp16 = mul(x = hidden_states_1121_cast_fp16, y = var_5183_cast_fp16)[name = string("hidden_states_1125_cast_fp16")]; tensor model_model_layers_24_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072573952)))]; tensor var_5186_cast_fp16 = mul(x = model_model_layers_24_self_attn_k_norm_weight_to_fp16, y = hidden_states_1125_cast_fp16)[name = string("op_5186_cast_fp16")]; tensor k_49_perm_0 = const()[name = string("k_49_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_24_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1072574272)))]; tensor linear_170_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_24_self_attn_v_proj_weight_to_fp16, x = hidden_states_1111_cast_fp16)[name = string("linear_170_cast_fp16")]; tensor concat_459x = const()[name = string("concat_459x"), val = tensor([1, -1, 8, 128])]; tensor var_5191_cast_fp16 = reshape(shape = concat_459x, x = linear_170_cast_fp16)[name = string("op_5191_cast_fp16")]; tensor v_state_49_perm_0 = const()[name = string("v_state_49_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_49_cast_fp16 = transpose(perm = q_49_perm_0, x = var_5170_cast_fp16)[name = string("transpose_15")]; tensor var_5195_cast_fp16 = mul(x = q_49_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5195_cast_fp16")]; tensor x1_97_begin_0 = const()[name = string("x1_97_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_97_end_0 = const()[name = string("x1_97_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_97_end_mask_0 = const()[name = string("x1_97_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_97_cast_fp16 = slice_by_index(begin = x1_97_begin_0, end = x1_97_end_0, end_mask = x1_97_end_mask_0, x = q_49_cast_fp16)[name = string("x1_97_cast_fp16")]; tensor x2_97_begin_0 = const()[name = string("x2_97_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_97_end_0 = const()[name = string("x2_97_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_97_end_mask_0 = const()[name = string("x2_97_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_97_cast_fp16 = slice_by_index(begin = x2_97_begin_0, end = x2_97_end_0, end_mask = x2_97_end_mask_0, x = q_49_cast_fp16)[name = string("x2_97_cast_fp16")]; fp16 const_52_promoted_to_fp16 = const()[name = string("const_52_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5206_cast_fp16 = mul(x = x2_97_cast_fp16, y = const_52_promoted_to_fp16)[name = string("op_5206_cast_fp16")]; bool var_5208_interleave_0 = const()[name = string("op_5208_interleave_0"), val = bool(false)]; tensor var_5208_cast_fp16 = concat(axis = var_72, interleave = var_5208_interleave_0, values = (var_5206_cast_fp16, x1_97_cast_fp16))[name = string("op_5208_cast_fp16")]; tensor var_5209_cast_fp16 = mul(x = var_5208_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5209_cast_fp16")]; tensor query_97_cast_fp16 = add(x = var_5195_cast_fp16, y = var_5209_cast_fp16)[name = string("query_97_cast_fp16")]; tensor k_49_cast_fp16 = transpose(perm = k_49_perm_0, x = var_5186_cast_fp16)[name = string("transpose_14")]; tensor var_5211_cast_fp16 = mul(x = k_49_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5211_cast_fp16")]; tensor x1_99_begin_0 = const()[name = string("x1_99_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_99_end_0 = const()[name = string("x1_99_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_99_end_mask_0 = const()[name = string("x1_99_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_99_cast_fp16 = slice_by_index(begin = x1_99_begin_0, end = x1_99_end_0, end_mask = x1_99_end_mask_0, x = k_49_cast_fp16)[name = string("x1_99_cast_fp16")]; tensor x2_99_begin_0 = const()[name = string("x2_99_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_99_end_0 = const()[name = string("x2_99_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_99_end_mask_0 = const()[name = string("x2_99_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_99_cast_fp16 = slice_by_index(begin = x2_99_begin_0, end = x2_99_end_0, end_mask = x2_99_end_mask_0, x = k_49_cast_fp16)[name = string("x2_99_cast_fp16")]; fp16 const_53_promoted_to_fp16 = const()[name = string("const_53_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5222_cast_fp16 = mul(x = x2_99_cast_fp16, y = const_53_promoted_to_fp16)[name = string("op_5222_cast_fp16")]; bool var_5224_interleave_0 = const()[name = string("op_5224_interleave_0"), val = bool(false)]; tensor var_5224_cast_fp16 = concat(axis = var_72, interleave = var_5224_interleave_0, values = (var_5222_cast_fp16, x1_99_cast_fp16))[name = string("op_5224_cast_fp16")]; tensor var_5225_cast_fp16 = mul(x = var_5224_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5225_cast_fp16")]; tensor k_state_49_cast_fp16 = add(x = var_5211_cast_fp16, y = var_5225_cast_fp16)[name = string("k_state_49_cast_fp16")]; tensor expand_dims_288 = const()[name = string("expand_dims_288"), val = tensor([0])]; tensor expand_dims_289 = const()[name = string("expand_dims_289"), val = tensor([0])]; tensor expand_dims_291 = const()[name = string("expand_dims_291"), val = tensor([0])]; tensor concat_462_values0_0 = const()[name = string("concat_462_values0_0"), val = tensor([24])]; int32 concat_462_axis_0 = const()[name = string("concat_462_axis_0"), val = int32(0)]; bool concat_462_interleave_0 = const()[name = string("concat_462_interleave_0"), val = bool(false)]; tensor concat_462 = concat(axis = concat_462_axis_0, interleave = concat_462_interleave_0, values = (concat_462_values0_0, expand_dims_288, expand_dims_289, expand_dims_2, expand_dims_291))[name = string("concat_462")]; tensor key_cache_internal_tensor_assign_25_stride_0 = const()[name = string("key_cache_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_25_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_25_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_462, begin_mask = key_cache_internal_tensor_assign_25_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_25_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_25_squeeze_mask_0, stride = key_cache_internal_tensor_assign_25_stride_0, update = k_state_49_cast_fp16, x = coreml_update_state_102)[name = string("key_cache_internal_tensor_assign_25_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_25_cast_fp16, input = key_cache)[name = string("coreml_update_state_104_write_state")]; tensor coreml_update_state_104 = read_state(input = key_cache)[name = string("coreml_update_state_104")]; tensor value_cache_internal_tensor_assign_25_stride_0 = const()[name = string("value_cache_internal_tensor_assign_25_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_25_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_25_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_25_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_25_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_25_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_25_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_49_cast_fp16 = transpose(perm = v_state_49_perm_0, x = var_5191_cast_fp16)[name = string("transpose_13")]; tensor value_cache_internal_tensor_assign_25_cast_fp16 = slice_update(begin = concat_462, begin_mask = value_cache_internal_tensor_assign_25_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_25_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_25_squeeze_mask_0, stride = value_cache_internal_tensor_assign_25_stride_0, update = v_state_49_cast_fp16, x = coreml_update_state_103)[name = string("value_cache_internal_tensor_assign_25_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_25_cast_fp16, input = value_cache)[name = string("coreml_update_state_105_write_state")]; tensor coreml_update_state_105 = read_state(input = value_cache)[name = string("coreml_update_state_105")]; tensor var_5248_begin_0 = const()[name = string("op_5248_begin_0"), val = tensor([24, 0, 0, 0, 0])]; tensor var_5248_end_0 = const()[name = string("op_5248_end_0"), val = tensor([25, 1, 8, 2048, 128])]; tensor var_5248_end_mask_0 = const()[name = string("op_5248_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5248_squeeze_mask_0 = const()[name = string("op_5248_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_5248_cast_fp16 = slice_by_index(begin = var_5248_begin_0, end = var_5248_end_0, end_mask = var_5248_end_mask_0, squeeze_mask = var_5248_squeeze_mask_0, x = coreml_update_state_104)[name = string("op_5248_cast_fp16")]; tensor var_5251_begin_0 = const()[name = string("op_5251_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5251_end_mask_0 = const()[name = string("op_5251_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5251_cast_fp16 = slice_by_index(begin = var_5251_begin_0, end = concat_12, end_mask = var_5251_end_mask_0, x = var_5248_cast_fp16)[name = string("op_5251_cast_fp16")]; tensor var_5253_begin_0 = const()[name = string("op_5253_begin_0"), val = tensor([24, 0, 0, 0, 0])]; tensor var_5253_end_0 = const()[name = string("op_5253_end_0"), val = tensor([25, 1, 8, 2048, 128])]; tensor var_5253_end_mask_0 = const()[name = string("op_5253_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5253_squeeze_mask_0 = const()[name = string("op_5253_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_5253_cast_fp16 = slice_by_index(begin = var_5253_begin_0, end = var_5253_end_0, end_mask = var_5253_end_mask_0, squeeze_mask = var_5253_squeeze_mask_0, x = coreml_update_state_105)[name = string("op_5253_cast_fp16")]; tensor var_5256_begin_0 = const()[name = string("op_5256_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5256_end_mask_0 = const()[name = string("op_5256_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5256_cast_fp16 = slice_by_index(begin = var_5256_begin_0, end = concat_12, end_mask = var_5256_end_mask_0, x = var_5253_cast_fp16)[name = string("op_5256_cast_fp16")]; tensor var_5258_shape_cast_fp16 = shape(x = var_5251_cast_fp16)[name = string("op_5258_shape_cast_fp16")]; int32 gather_445 = const()[name = string("gather_445"), val = int32(1)]; int32 gather_446 = const()[name = string("gather_446"), val = int32(8)]; int32 gather_447_axis_0 = const()[name = string("gather_447_axis_0"), val = int32(0)]; int32 gather_447_batch_dims_0 = const()[name = string("gather_447_batch_dims_0"), val = int32(0)]; bool gather_447_validate_indices_0 = const()[name = string("gather_447_validate_indices_0"), val = bool(false)]; string var_5258_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5258_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_447_to_uint16 = const()[name = string("select_447_to_uint16"), val = uint16(2)]; tensor var_5258_shape_cast_fp16_to_uint16 = cast(dtype = var_5258_shape_cast_fp16_to_uint16_dtype_0, x = var_5258_shape_cast_fp16)[name = string("cast_544")]; uint16 gather_447_cast_uint16 = gather(axis = gather_447_axis_0, batch_dims = gather_447_batch_dims_0, indices = select_447_to_uint16, validate_indices = gather_447_validate_indices_0, x = var_5258_shape_cast_fp16_to_uint16)[name = string("gather_447_cast_uint16")]; string gather_447_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_447_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_448 = const()[name = string("gather_448"), val = int32(128)]; tensor var_5265_axes_0 = const()[name = string("op_5265_axes_0"), val = tensor([2])]; tensor var_5265_cast_fp16 = expand_dims(axes = var_5265_axes_0, x = var_5251_cast_fp16)[name = string("op_5265_cast_fp16")]; int32 concat_470_axis_0 = const()[name = string("concat_470_axis_0"), val = int32(0)]; bool concat_470_interleave_0 = const()[name = string("concat_470_interleave_0"), val = bool(false)]; int32 gather_447_cast_uint16_to_int32 = cast(dtype = gather_447_cast_uint16_to_int32_dtype_0, x = gather_447_cast_uint16)[name = string("cast_543")]; tensor concat_470 = concat(axis = concat_470_axis_0, interleave = concat_470_interleave_0, values = (gather_445, gather_446, var_78, gather_447_cast_uint16_to_int32, gather_448))[name = string("concat_470")]; tensor shape_497_cast_fp16 = shape(x = var_5265_cast_fp16)[name = string("shape_497_cast_fp16")]; tensor real_div_48 = real_div(x = concat_470, y = shape_497_cast_fp16)[name = string("real_div_48")]; tensor hidden_states_1131_cast_fp16 = tile(reps = real_div_48, x = var_5265_cast_fp16)[name = string("hidden_states_1131_cast_fp16")]; tensor concat_471x = const()[name = string("concat_471x"), val = tensor([1, 16, -1, 128])]; tensor key_97_cast_fp16 = reshape(shape = concat_471x, x = hidden_states_1131_cast_fp16)[name = string("key_97_cast_fp16")]; tensor var_5275_shape_cast_fp16 = shape(x = var_5256_cast_fp16)[name = string("op_5275_shape_cast_fp16")]; int32 gather_449 = const()[name = string("gather_449"), val = int32(1)]; int32 gather_450 = const()[name = string("gather_450"), val = int32(8)]; int32 gather_451_axis_0 = const()[name = string("gather_451_axis_0"), val = int32(0)]; int32 gather_451_batch_dims_0 = const()[name = string("gather_451_batch_dims_0"), val = int32(0)]; bool gather_451_validate_indices_0 = const()[name = string("gather_451_validate_indices_0"), val = bool(false)]; string var_5275_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5275_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_451_to_uint16 = const()[name = string("select_451_to_uint16"), val = uint16(2)]; tensor var_5275_shape_cast_fp16_to_uint16 = cast(dtype = var_5275_shape_cast_fp16_to_uint16_dtype_0, x = var_5275_shape_cast_fp16)[name = string("cast_542")]; uint16 gather_451_cast_uint16 = gather(axis = gather_451_axis_0, batch_dims = gather_451_batch_dims_0, indices = select_451_to_uint16, validate_indices = gather_451_validate_indices_0, x = var_5275_shape_cast_fp16_to_uint16)[name = string("gather_451_cast_uint16")]; string gather_451_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_451_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_452 = const()[name = string("gather_452"), val = int32(128)]; tensor var_5282_axes_0 = const()[name = string("op_5282_axes_0"), val = tensor([2])]; tensor var_5282_cast_fp16 = expand_dims(axes = var_5282_axes_0, x = var_5256_cast_fp16)[name = string("op_5282_cast_fp16")]; int32 concat_472_axis_0 = const()[name = string("concat_472_axis_0"), val = int32(0)]; bool concat_472_interleave_0 = const()[name = string("concat_472_interleave_0"), val = bool(false)]; int32 gather_451_cast_uint16_to_int32 = cast(dtype = gather_451_cast_uint16_to_int32_dtype_0, x = gather_451_cast_uint16)[name = string("cast_541")]; tensor concat_472 = concat(axis = concat_472_axis_0, interleave = concat_472_interleave_0, values = (gather_449, gather_450, var_78, gather_451_cast_uint16_to_int32, gather_452))[name = string("concat_472")]; tensor shape_502_cast_fp16 = shape(x = var_5282_cast_fp16)[name = string("shape_502_cast_fp16")]; tensor real_div_49 = real_div(x = concat_472, y = shape_502_cast_fp16)[name = string("real_div_49")]; tensor hidden_states_1135_cast_fp16 = tile(reps = real_div_49, x = var_5282_cast_fp16)[name = string("hidden_states_1135_cast_fp16")]; tensor concat_473x = const()[name = string("concat_473x"), val = tensor([1, 16, -1, 128])]; tensor value_97_cast_fp16 = reshape(shape = concat_473x, x = hidden_states_1135_cast_fp16)[name = string("value_97_cast_fp16")]; tensor var_5292_shape_cast_fp16 = shape(x = key_97_cast_fp16)[name = string("op_5292_shape_cast_fp16")]; int32 gather_453_axis_0 = const()[name = string("gather_453_axis_0"), val = int32(0)]; int32 gather_453_batch_dims_0 = const()[name = string("gather_453_batch_dims_0"), val = int32(0)]; bool gather_453_validate_indices_0 = const()[name = string("gather_453_validate_indices_0"), val = bool(false)]; string var_5292_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5292_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_453_to_uint16 = const()[name = string("select_453_to_uint16"), val = uint16(2)]; tensor var_5292_shape_cast_fp16_to_uint16 = cast(dtype = var_5292_shape_cast_fp16_to_uint16_dtype_0, x = var_5292_shape_cast_fp16)[name = string("cast_540")]; uint16 gather_453_cast_uint16 = gather(axis = gather_453_axis_0, batch_dims = gather_453_batch_dims_0, indices = select_453_to_uint16, validate_indices = gather_453_validate_indices_0, x = var_5292_shape_cast_fp16_to_uint16)[name = string("gather_453_cast_uint16")]; string gather_453_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_453_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_474_values0_0 = const()[name = string("concat_474_values0_0"), val = int32(1)]; int32 concat_474_values1_0 = const()[name = string("concat_474_values1_0"), val = int32(1)]; int32 concat_474_values2_0 = const()[name = string("concat_474_values2_0"), val = int32(0)]; int32 concat_474_axis_0 = const()[name = string("concat_474_axis_0"), val = int32(0)]; bool concat_474_interleave_0 = const()[name = string("concat_474_interleave_0"), val = bool(false)]; int32 gather_453_cast_uint16_to_int32 = cast(dtype = gather_453_cast_uint16_to_int32_dtype_0, x = gather_453_cast_uint16)[name = string("cast_539")]; tensor concat_474 = concat(axis = concat_474_axis_0, interleave = concat_474_interleave_0, values = (concat_474_values0_0, concat_474_values1_0, concat_474_values2_0, gather_453_cast_uint16_to_int32))[name = string("concat_474")]; tensor attention_mask_49_begin_0 = const()[name = string("attention_mask_49_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_49_end_mask_0 = const()[name = string("attention_mask_49_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_49_cast_fp16 = slice_by_index(begin = attention_mask_49_begin_0, end = concat_474, end_mask = attention_mask_49_end_mask_0, x = causal_mask)[name = string("attention_mask_49_cast_fp16")]; tensor mul_24_cast_fp16 = mul(x = query_97_cast_fp16, y = var_85_to_fp16)[name = string("mul_24_cast_fp16")]; bool matmul_24_transpose_y_0 = const()[name = string("matmul_24_transpose_y_0"), val = bool(true)]; bool matmul_24_transpose_x_0 = const()[name = string("matmul_24_transpose_x_0"), val = bool(false)]; tensor matmul_24_cast_fp16 = matmul(transpose_x = matmul_24_transpose_x_0, transpose_y = matmul_24_transpose_y_0, x = mul_24_cast_fp16, y = key_97_cast_fp16)[name = string("matmul_24_cast_fp16")]; tensor add_478_cast_fp16 = add(x = matmul_24_cast_fp16, y = attention_mask_49_cast_fp16)[name = string("add_478_cast_fp16")]; int32 softmax_24_axis_0 = const()[name = string("softmax_24_axis_0"), val = int32(-1)]; tensor softmax_24_cast_fp16 = softmax(axis = softmax_24_axis_0, x = add_478_cast_fp16)[name = string("softmax_24_cast_fp16")]; bool attn_output_97_transpose_x_0 = const()[name = string("attn_output_97_transpose_x_0"), val = bool(false)]; bool attn_output_97_transpose_y_0 = const()[name = string("attn_output_97_transpose_y_0"), val = bool(false)]; tensor attn_output_97_cast_fp16 = matmul(transpose_x = attn_output_97_transpose_x_0, transpose_y = attn_output_97_transpose_y_0, x = softmax_24_cast_fp16, y = value_97_cast_fp16)[name = string("attn_output_97_cast_fp16")]; tensor var_5301_perm_0 = const()[name = string("op_5301_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_475_axis_0 = const()[name = string("concat_475_axis_0"), val = int32(0)]; bool concat_475_interleave_0 = const()[name = string("concat_475_interleave_0"), val = bool(false)]; int32 gather_437_cast_uint16_to_int32 = cast(dtype = gather_437_cast_uint16_to_int32_dtype_0, x = gather_437_cast_uint16)[name = string("cast_545")]; tensor concat_475 = concat(axis = concat_475_axis_0, interleave = concat_475_interleave_0, values = (gather_436, gather_437_cast_uint16_to_int32, var_72))[name = string("concat_475")]; tensor var_5301_cast_fp16 = transpose(perm = var_5301_perm_0, x = attn_output_97_cast_fp16)[name = string("transpose_12")]; tensor var_5304_cast_fp16 = reshape(shape = concat_475, x = var_5301_cast_fp16)[name = string("op_5304_cast_fp16")]; tensor model_model_layers_24_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_24_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1074671488)))]; tensor linear_171_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_24_self_attn_o_proj_weight_to_fp16, x = var_5304_cast_fp16)[name = string("linear_171_cast_fp16")]; tensor hidden_states_1139_cast_fp16 = add(x = hidden_states_1103_cast_fp16, y = linear_171_cast_fp16)[name = string("hidden_states_1139_cast_fp16")]; fp16 var_78_promoted_99_to_fp16 = const()[name = string("op_78_promoted_99_to_fp16"), val = fp16(0x1p+1)]; tensor var_5311_cast_fp16 = pow(x = hidden_states_1139_cast_fp16, y = var_78_promoted_99_to_fp16)[name = string("op_5311_cast_fp16")]; tensor variance_199_axes_0 = const()[name = string("variance_199_axes_0"), val = tensor([-1])]; bool variance_199_keep_dims_0 = const()[name = string("variance_199_keep_dims_0"), val = bool(true)]; tensor variance_199_cast_fp16 = reduce_mean(axes = variance_199_axes_0, keep_dims = variance_199_keep_dims_0, x = var_5311_cast_fp16)[name = string("variance_199_cast_fp16")]; fp16 var_5314_to_fp16 = const()[name = string("op_5314_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5315_cast_fp16 = add(x = variance_199_cast_fp16, y = var_5314_to_fp16)[name = string("op_5315_cast_fp16")]; fp32 var_5316_epsilon_0 = const()[name = string("op_5316_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5316_cast_fp16 = rsqrt(epsilon = var_5316_epsilon_0, x = var_5315_cast_fp16)[name = string("op_5316_cast_fp16")]; tensor hidden_states_1143_cast_fp16 = mul(x = hidden_states_1139_cast_fp16, y = var_5316_cast_fp16)[name = string("hidden_states_1143_cast_fp16")]; tensor model_model_layers_24_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_24_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1078865856)))]; tensor input_195_cast_fp16 = mul(x = model_model_layers_24_post_attention_layernorm_weight_to_fp16, y = hidden_states_1143_cast_fp16)[name = string("input_195_cast_fp16")]; tensor model_model_layers_24_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_24_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1078867968)))]; tensor linear_172_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_24_mlp_gate_proj_weight_to_fp16, x = input_195_cast_fp16)[name = string("linear_172_cast_fp16")]; tensor var_5328_cast_fp16 = silu(x = linear_172_cast_fp16)[name = string("op_5328_cast_fp16")]; tensor model_model_layers_24_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_24_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1085159488)))]; tensor linear_173_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_24_mlp_up_proj_weight_to_fp16, x = input_195_cast_fp16)[name = string("linear_173_cast_fp16")]; tensor input_199_cast_fp16 = mul(x = var_5328_cast_fp16, y = linear_173_cast_fp16)[name = string("input_199_cast_fp16")]; tensor model_model_layers_24_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_24_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1091451008)))]; tensor linear_174_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_24_mlp_down_proj_weight_to_fp16, x = input_199_cast_fp16)[name = string("linear_174_cast_fp16")]; tensor hidden_states_1149_cast_fp16 = add(x = hidden_states_1139_cast_fp16, y = linear_174_cast_fp16)[name = string("hidden_states_1149_cast_fp16")]; fp16 var_78_promoted_100_to_fp16 = const()[name = string("op_78_promoted_100_to_fp16"), val = fp16(0x1p+1)]; tensor var_5341_cast_fp16 = pow(x = hidden_states_1149_cast_fp16, y = var_78_promoted_100_to_fp16)[name = string("op_5341_cast_fp16")]; tensor variance_201_axes_0 = const()[name = string("variance_201_axes_0"), val = tensor([-1])]; bool variance_201_keep_dims_0 = const()[name = string("variance_201_keep_dims_0"), val = bool(true)]; tensor variance_201_cast_fp16 = reduce_mean(axes = variance_201_axes_0, keep_dims = variance_201_keep_dims_0, x = var_5341_cast_fp16)[name = string("variance_201_cast_fp16")]; fp16 var_5344_to_fp16 = const()[name = string("op_5344_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5345_cast_fp16 = add(x = variance_201_cast_fp16, y = var_5344_to_fp16)[name = string("op_5345_cast_fp16")]; fp32 var_5346_epsilon_0 = const()[name = string("op_5346_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5346_cast_fp16 = rsqrt(epsilon = var_5346_epsilon_0, x = var_5345_cast_fp16)[name = string("op_5346_cast_fp16")]; tensor hidden_states_1153_cast_fp16 = mul(x = hidden_states_1149_cast_fp16, y = var_5346_cast_fp16)[name = string("hidden_states_1153_cast_fp16")]; tensor model_model_layers_25_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1097742528)))]; tensor hidden_states_1157_cast_fp16 = mul(x = model_model_layers_25_input_layernorm_weight_to_fp16, y = hidden_states_1153_cast_fp16)[name = string("hidden_states_1157_cast_fp16")]; tensor var_5359_shape_cast_fp16 = shape(x = hidden_states_1157_cast_fp16)[name = string("op_5359_shape_cast_fp16")]; int32 gather_454 = const()[name = string("gather_454"), val = int32(1)]; int32 gather_455_axis_0 = const()[name = string("gather_455_axis_0"), val = int32(0)]; int32 gather_455_batch_dims_0 = const()[name = string("gather_455_batch_dims_0"), val = int32(0)]; bool gather_455_validate_indices_0 = const()[name = string("gather_455_validate_indices_0"), val = bool(false)]; string var_5359_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5359_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_455_to_uint16 = const()[name = string("select_455_to_uint16"), val = uint16(1)]; tensor var_5359_shape_cast_fp16_to_uint16 = cast(dtype = var_5359_shape_cast_fp16_to_uint16_dtype_0, x = var_5359_shape_cast_fp16)[name = string("cast_538")]; uint16 gather_455_cast_uint16 = gather(axis = gather_455_axis_0, batch_dims = gather_455_batch_dims_0, indices = select_455_to_uint16, validate_indices = gather_455_validate_indices_0, x = var_5359_shape_cast_fp16_to_uint16)[name = string("gather_455_cast_uint16")]; string gather_455_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_455_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_25_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1097744640)))]; tensor linear_175_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_25_self_attn_q_proj_weight_to_fp16, x = hidden_states_1157_cast_fp16)[name = string("linear_175_cast_fp16")]; tensor concat_476x = const()[name = string("concat_476x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_1159_cast_fp16 = reshape(shape = concat_476x, x = linear_175_cast_fp16)[name = string("hidden_states_1159_cast_fp16")]; fp16 var_78_promoted_101_to_fp16 = const()[name = string("op_78_promoted_101_to_fp16"), val = fp16(0x1p+1)]; tensor var_5367_cast_fp16 = pow(x = hidden_states_1159_cast_fp16, y = var_78_promoted_101_to_fp16)[name = string("op_5367_cast_fp16")]; tensor variance_203_axes_0 = const()[name = string("variance_203_axes_0"), val = tensor([-1])]; bool variance_203_keep_dims_0 = const()[name = string("variance_203_keep_dims_0"), val = bool(true)]; tensor variance_203_cast_fp16 = reduce_mean(axes = variance_203_axes_0, keep_dims = variance_203_keep_dims_0, x = var_5367_cast_fp16)[name = string("variance_203_cast_fp16")]; fp16 var_5370_to_fp16 = const()[name = string("op_5370_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5371_cast_fp16 = add(x = variance_203_cast_fp16, y = var_5370_to_fp16)[name = string("op_5371_cast_fp16")]; fp32 var_5372_epsilon_0 = const()[name = string("op_5372_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5372_cast_fp16 = rsqrt(epsilon = var_5372_epsilon_0, x = var_5371_cast_fp16)[name = string("op_5372_cast_fp16")]; tensor hidden_states_1163_cast_fp16 = mul(x = hidden_states_1159_cast_fp16, y = var_5372_cast_fp16)[name = string("hidden_states_1163_cast_fp16")]; tensor model_model_layers_25_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1101939008)))]; tensor var_5375_cast_fp16 = mul(x = model_model_layers_25_self_attn_q_norm_weight_to_fp16, y = hidden_states_1163_cast_fp16)[name = string("op_5375_cast_fp16")]; tensor q_51_perm_0 = const()[name = string("q_51_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_25_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1101939328)))]; tensor linear_176_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_25_self_attn_k_proj_weight_to_fp16, x = hidden_states_1157_cast_fp16)[name = string("linear_176_cast_fp16")]; tensor concat_477x = const()[name = string("concat_477x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_1167_cast_fp16 = reshape(shape = concat_477x, x = linear_176_cast_fp16)[name = string("hidden_states_1167_cast_fp16")]; fp16 var_78_promoted_102_to_fp16 = const()[name = string("op_78_promoted_102_to_fp16"), val = fp16(0x1p+1)]; tensor var_5383_cast_fp16 = pow(x = hidden_states_1167_cast_fp16, y = var_78_promoted_102_to_fp16)[name = string("op_5383_cast_fp16")]; tensor variance_205_axes_0 = const()[name = string("variance_205_axes_0"), val = tensor([-1])]; bool variance_205_keep_dims_0 = const()[name = string("variance_205_keep_dims_0"), val = bool(true)]; tensor variance_205_cast_fp16 = reduce_mean(axes = variance_205_axes_0, keep_dims = variance_205_keep_dims_0, x = var_5383_cast_fp16)[name = string("variance_205_cast_fp16")]; fp16 var_5386_to_fp16 = const()[name = string("op_5386_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5387_cast_fp16 = add(x = variance_205_cast_fp16, y = var_5386_to_fp16)[name = string("op_5387_cast_fp16")]; fp32 var_5388_epsilon_0 = const()[name = string("op_5388_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5388_cast_fp16 = rsqrt(epsilon = var_5388_epsilon_0, x = var_5387_cast_fp16)[name = string("op_5388_cast_fp16")]; tensor hidden_states_1171_cast_fp16 = mul(x = hidden_states_1167_cast_fp16, y = var_5388_cast_fp16)[name = string("hidden_states_1171_cast_fp16")]; tensor model_model_layers_25_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1104036544)))]; tensor var_5391_cast_fp16 = mul(x = model_model_layers_25_self_attn_k_norm_weight_to_fp16, y = hidden_states_1171_cast_fp16)[name = string("op_5391_cast_fp16")]; tensor k_51_perm_0 = const()[name = string("k_51_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_25_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1104036864)))]; tensor linear_177_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_25_self_attn_v_proj_weight_to_fp16, x = hidden_states_1157_cast_fp16)[name = string("linear_177_cast_fp16")]; tensor concat_478x = const()[name = string("concat_478x"), val = tensor([1, -1, 8, 128])]; tensor var_5396_cast_fp16 = reshape(shape = concat_478x, x = linear_177_cast_fp16)[name = string("op_5396_cast_fp16")]; tensor v_state_51_perm_0 = const()[name = string("v_state_51_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_51_cast_fp16 = transpose(perm = q_51_perm_0, x = var_5375_cast_fp16)[name = string("transpose_11")]; tensor var_5400_cast_fp16 = mul(x = q_51_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5400_cast_fp16")]; tensor x1_101_begin_0 = const()[name = string("x1_101_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_101_end_0 = const()[name = string("x1_101_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_101_end_mask_0 = const()[name = string("x1_101_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_101_cast_fp16 = slice_by_index(begin = x1_101_begin_0, end = x1_101_end_0, end_mask = x1_101_end_mask_0, x = q_51_cast_fp16)[name = string("x1_101_cast_fp16")]; tensor x2_101_begin_0 = const()[name = string("x2_101_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_101_end_0 = const()[name = string("x2_101_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_101_end_mask_0 = const()[name = string("x2_101_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_101_cast_fp16 = slice_by_index(begin = x2_101_begin_0, end = x2_101_end_0, end_mask = x2_101_end_mask_0, x = q_51_cast_fp16)[name = string("x2_101_cast_fp16")]; fp16 const_54_promoted_to_fp16 = const()[name = string("const_54_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5411_cast_fp16 = mul(x = x2_101_cast_fp16, y = const_54_promoted_to_fp16)[name = string("op_5411_cast_fp16")]; bool var_5413_interleave_0 = const()[name = string("op_5413_interleave_0"), val = bool(false)]; tensor var_5413_cast_fp16 = concat(axis = var_72, interleave = var_5413_interleave_0, values = (var_5411_cast_fp16, x1_101_cast_fp16))[name = string("op_5413_cast_fp16")]; tensor var_5414_cast_fp16 = mul(x = var_5413_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5414_cast_fp16")]; tensor query_101_cast_fp16 = add(x = var_5400_cast_fp16, y = var_5414_cast_fp16)[name = string("query_101_cast_fp16")]; tensor k_51_cast_fp16 = transpose(perm = k_51_perm_0, x = var_5391_cast_fp16)[name = string("transpose_10")]; tensor var_5416_cast_fp16 = mul(x = k_51_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5416_cast_fp16")]; tensor x1_103_begin_0 = const()[name = string("x1_103_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_103_end_0 = const()[name = string("x1_103_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_103_end_mask_0 = const()[name = string("x1_103_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_103_cast_fp16 = slice_by_index(begin = x1_103_begin_0, end = x1_103_end_0, end_mask = x1_103_end_mask_0, x = k_51_cast_fp16)[name = string("x1_103_cast_fp16")]; tensor x2_103_begin_0 = const()[name = string("x2_103_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_103_end_0 = const()[name = string("x2_103_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_103_end_mask_0 = const()[name = string("x2_103_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_103_cast_fp16 = slice_by_index(begin = x2_103_begin_0, end = x2_103_end_0, end_mask = x2_103_end_mask_0, x = k_51_cast_fp16)[name = string("x2_103_cast_fp16")]; fp16 const_55_promoted_to_fp16 = const()[name = string("const_55_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5427_cast_fp16 = mul(x = x2_103_cast_fp16, y = const_55_promoted_to_fp16)[name = string("op_5427_cast_fp16")]; bool var_5429_interleave_0 = const()[name = string("op_5429_interleave_0"), val = bool(false)]; tensor var_5429_cast_fp16 = concat(axis = var_72, interleave = var_5429_interleave_0, values = (var_5427_cast_fp16, x1_103_cast_fp16))[name = string("op_5429_cast_fp16")]; tensor var_5430_cast_fp16 = mul(x = var_5429_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5430_cast_fp16")]; tensor k_state_51_cast_fp16 = add(x = var_5416_cast_fp16, y = var_5430_cast_fp16)[name = string("k_state_51_cast_fp16")]; tensor expand_dims_300 = const()[name = string("expand_dims_300"), val = tensor([0])]; tensor expand_dims_301 = const()[name = string("expand_dims_301"), val = tensor([0])]; tensor expand_dims_303 = const()[name = string("expand_dims_303"), val = tensor([0])]; tensor concat_481_values0_0 = const()[name = string("concat_481_values0_0"), val = tensor([25])]; int32 concat_481_axis_0 = const()[name = string("concat_481_axis_0"), val = int32(0)]; bool concat_481_interleave_0 = const()[name = string("concat_481_interleave_0"), val = bool(false)]; tensor concat_481 = concat(axis = concat_481_axis_0, interleave = concat_481_interleave_0, values = (concat_481_values0_0, expand_dims_300, expand_dims_301, expand_dims_2, expand_dims_303))[name = string("concat_481")]; tensor key_cache_internal_tensor_assign_26_stride_0 = const()[name = string("key_cache_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_26_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_26_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_481, begin_mask = key_cache_internal_tensor_assign_26_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_26_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_26_squeeze_mask_0, stride = key_cache_internal_tensor_assign_26_stride_0, update = k_state_51_cast_fp16, x = coreml_update_state_104)[name = string("key_cache_internal_tensor_assign_26_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_26_cast_fp16, input = key_cache)[name = string("coreml_update_state_106_write_state")]; tensor coreml_update_state_106 = read_state(input = key_cache)[name = string("coreml_update_state_106")]; tensor value_cache_internal_tensor_assign_26_stride_0 = const()[name = string("value_cache_internal_tensor_assign_26_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_26_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_26_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_26_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_26_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_26_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_26_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_51_cast_fp16 = transpose(perm = v_state_51_perm_0, x = var_5396_cast_fp16)[name = string("transpose_9")]; tensor value_cache_internal_tensor_assign_26_cast_fp16 = slice_update(begin = concat_481, begin_mask = value_cache_internal_tensor_assign_26_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_26_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_26_squeeze_mask_0, stride = value_cache_internal_tensor_assign_26_stride_0, update = v_state_51_cast_fp16, x = coreml_update_state_105)[name = string("value_cache_internal_tensor_assign_26_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_26_cast_fp16, input = value_cache)[name = string("coreml_update_state_107_write_state")]; tensor coreml_update_state_107 = read_state(input = value_cache)[name = string("coreml_update_state_107")]; tensor var_5453_begin_0 = const()[name = string("op_5453_begin_0"), val = tensor([25, 0, 0, 0, 0])]; tensor var_5453_end_0 = const()[name = string("op_5453_end_0"), val = tensor([26, 1, 8, 2048, 128])]; tensor var_5453_end_mask_0 = const()[name = string("op_5453_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5453_squeeze_mask_0 = const()[name = string("op_5453_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_5453_cast_fp16 = slice_by_index(begin = var_5453_begin_0, end = var_5453_end_0, end_mask = var_5453_end_mask_0, squeeze_mask = var_5453_squeeze_mask_0, x = coreml_update_state_106)[name = string("op_5453_cast_fp16")]; tensor var_5456_begin_0 = const()[name = string("op_5456_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5456_end_mask_0 = const()[name = string("op_5456_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5456_cast_fp16 = slice_by_index(begin = var_5456_begin_0, end = concat_12, end_mask = var_5456_end_mask_0, x = var_5453_cast_fp16)[name = string("op_5456_cast_fp16")]; tensor var_5458_begin_0 = const()[name = string("op_5458_begin_0"), val = tensor([25, 0, 0, 0, 0])]; tensor var_5458_end_0 = const()[name = string("op_5458_end_0"), val = tensor([26, 1, 8, 2048, 128])]; tensor var_5458_end_mask_0 = const()[name = string("op_5458_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5458_squeeze_mask_0 = const()[name = string("op_5458_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_5458_cast_fp16 = slice_by_index(begin = var_5458_begin_0, end = var_5458_end_0, end_mask = var_5458_end_mask_0, squeeze_mask = var_5458_squeeze_mask_0, x = coreml_update_state_107)[name = string("op_5458_cast_fp16")]; tensor var_5461_begin_0 = const()[name = string("op_5461_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5461_end_mask_0 = const()[name = string("op_5461_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5461_cast_fp16 = slice_by_index(begin = var_5461_begin_0, end = concat_12, end_mask = var_5461_end_mask_0, x = var_5458_cast_fp16)[name = string("op_5461_cast_fp16")]; tensor var_5463_shape_cast_fp16 = shape(x = var_5456_cast_fp16)[name = string("op_5463_shape_cast_fp16")]; int32 gather_463 = const()[name = string("gather_463"), val = int32(1)]; int32 gather_464 = const()[name = string("gather_464"), val = int32(8)]; int32 gather_465_axis_0 = const()[name = string("gather_465_axis_0"), val = int32(0)]; int32 gather_465_batch_dims_0 = const()[name = string("gather_465_batch_dims_0"), val = int32(0)]; bool gather_465_validate_indices_0 = const()[name = string("gather_465_validate_indices_0"), val = bool(false)]; string var_5463_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5463_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_465_to_uint16 = const()[name = string("select_465_to_uint16"), val = uint16(2)]; tensor var_5463_shape_cast_fp16_to_uint16 = cast(dtype = var_5463_shape_cast_fp16_to_uint16_dtype_0, x = var_5463_shape_cast_fp16)[name = string("cast_536")]; uint16 gather_465_cast_uint16 = gather(axis = gather_465_axis_0, batch_dims = gather_465_batch_dims_0, indices = select_465_to_uint16, validate_indices = gather_465_validate_indices_0, x = var_5463_shape_cast_fp16_to_uint16)[name = string("gather_465_cast_uint16")]; string gather_465_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_465_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_466 = const()[name = string("gather_466"), val = int32(128)]; tensor var_5470_axes_0 = const()[name = string("op_5470_axes_0"), val = tensor([2])]; tensor var_5470_cast_fp16 = expand_dims(axes = var_5470_axes_0, x = var_5456_cast_fp16)[name = string("op_5470_cast_fp16")]; int32 concat_489_axis_0 = const()[name = string("concat_489_axis_0"), val = int32(0)]; bool concat_489_interleave_0 = const()[name = string("concat_489_interleave_0"), val = bool(false)]; int32 gather_465_cast_uint16_to_int32 = cast(dtype = gather_465_cast_uint16_to_int32_dtype_0, x = gather_465_cast_uint16)[name = string("cast_535")]; tensor concat_489 = concat(axis = concat_489_axis_0, interleave = concat_489_interleave_0, values = (gather_463, gather_464, var_78, gather_465_cast_uint16_to_int32, gather_466))[name = string("concat_489")]; tensor shape_517_cast_fp16 = shape(x = var_5470_cast_fp16)[name = string("shape_517_cast_fp16")]; tensor real_div_50 = real_div(x = concat_489, y = shape_517_cast_fp16)[name = string("real_div_50")]; tensor hidden_states_1177_cast_fp16 = tile(reps = real_div_50, x = var_5470_cast_fp16)[name = string("hidden_states_1177_cast_fp16")]; tensor concat_490x = const()[name = string("concat_490x"), val = tensor([1, 16, -1, 128])]; tensor key_101_cast_fp16 = reshape(shape = concat_490x, x = hidden_states_1177_cast_fp16)[name = string("key_101_cast_fp16")]; tensor var_5480_shape_cast_fp16 = shape(x = var_5461_cast_fp16)[name = string("op_5480_shape_cast_fp16")]; int32 gather_467 = const()[name = string("gather_467"), val = int32(1)]; int32 gather_468 = const()[name = string("gather_468"), val = int32(8)]; int32 gather_469_axis_0 = const()[name = string("gather_469_axis_0"), val = int32(0)]; int32 gather_469_batch_dims_0 = const()[name = string("gather_469_batch_dims_0"), val = int32(0)]; bool gather_469_validate_indices_0 = const()[name = string("gather_469_validate_indices_0"), val = bool(false)]; string var_5480_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5480_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_469_to_uint16 = const()[name = string("select_469_to_uint16"), val = uint16(2)]; tensor var_5480_shape_cast_fp16_to_uint16 = cast(dtype = var_5480_shape_cast_fp16_to_uint16_dtype_0, x = var_5480_shape_cast_fp16)[name = string("cast_534")]; uint16 gather_469_cast_uint16 = gather(axis = gather_469_axis_0, batch_dims = gather_469_batch_dims_0, indices = select_469_to_uint16, validate_indices = gather_469_validate_indices_0, x = var_5480_shape_cast_fp16_to_uint16)[name = string("gather_469_cast_uint16")]; string gather_469_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_469_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_470 = const()[name = string("gather_470"), val = int32(128)]; tensor var_5487_axes_0 = const()[name = string("op_5487_axes_0"), val = tensor([2])]; tensor var_5487_cast_fp16 = expand_dims(axes = var_5487_axes_0, x = var_5461_cast_fp16)[name = string("op_5487_cast_fp16")]; int32 concat_491_axis_0 = const()[name = string("concat_491_axis_0"), val = int32(0)]; bool concat_491_interleave_0 = const()[name = string("concat_491_interleave_0"), val = bool(false)]; int32 gather_469_cast_uint16_to_int32 = cast(dtype = gather_469_cast_uint16_to_int32_dtype_0, x = gather_469_cast_uint16)[name = string("cast_533")]; tensor concat_491 = concat(axis = concat_491_axis_0, interleave = concat_491_interleave_0, values = (gather_467, gather_468, var_78, gather_469_cast_uint16_to_int32, gather_470))[name = string("concat_491")]; tensor shape_522_cast_fp16 = shape(x = var_5487_cast_fp16)[name = string("shape_522_cast_fp16")]; tensor real_div_51 = real_div(x = concat_491, y = shape_522_cast_fp16)[name = string("real_div_51")]; tensor hidden_states_1181_cast_fp16 = tile(reps = real_div_51, x = var_5487_cast_fp16)[name = string("hidden_states_1181_cast_fp16")]; tensor concat_492x = const()[name = string("concat_492x"), val = tensor([1, 16, -1, 128])]; tensor value_101_cast_fp16 = reshape(shape = concat_492x, x = hidden_states_1181_cast_fp16)[name = string("value_101_cast_fp16")]; tensor var_5497_shape_cast_fp16 = shape(x = key_101_cast_fp16)[name = string("op_5497_shape_cast_fp16")]; int32 gather_471_axis_0 = const()[name = string("gather_471_axis_0"), val = int32(0)]; int32 gather_471_batch_dims_0 = const()[name = string("gather_471_batch_dims_0"), val = int32(0)]; bool gather_471_validate_indices_0 = const()[name = string("gather_471_validate_indices_0"), val = bool(false)]; string var_5497_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5497_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_471_to_uint16 = const()[name = string("select_471_to_uint16"), val = uint16(2)]; tensor var_5497_shape_cast_fp16_to_uint16 = cast(dtype = var_5497_shape_cast_fp16_to_uint16_dtype_0, x = var_5497_shape_cast_fp16)[name = string("cast_532")]; uint16 gather_471_cast_uint16 = gather(axis = gather_471_axis_0, batch_dims = gather_471_batch_dims_0, indices = select_471_to_uint16, validate_indices = gather_471_validate_indices_0, x = var_5497_shape_cast_fp16_to_uint16)[name = string("gather_471_cast_uint16")]; string gather_471_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_471_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_493_values0_0 = const()[name = string("concat_493_values0_0"), val = int32(1)]; int32 concat_493_values1_0 = const()[name = string("concat_493_values1_0"), val = int32(1)]; int32 concat_493_values2_0 = const()[name = string("concat_493_values2_0"), val = int32(0)]; int32 concat_493_axis_0 = const()[name = string("concat_493_axis_0"), val = int32(0)]; bool concat_493_interleave_0 = const()[name = string("concat_493_interleave_0"), val = bool(false)]; int32 gather_471_cast_uint16_to_int32 = cast(dtype = gather_471_cast_uint16_to_int32_dtype_0, x = gather_471_cast_uint16)[name = string("cast_531")]; tensor concat_493 = concat(axis = concat_493_axis_0, interleave = concat_493_interleave_0, values = (concat_493_values0_0, concat_493_values1_0, concat_493_values2_0, gather_471_cast_uint16_to_int32))[name = string("concat_493")]; tensor attention_mask_51_begin_0 = const()[name = string("attention_mask_51_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_51_end_mask_0 = const()[name = string("attention_mask_51_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_51_cast_fp16 = slice_by_index(begin = attention_mask_51_begin_0, end = concat_493, end_mask = attention_mask_51_end_mask_0, x = causal_mask)[name = string("attention_mask_51_cast_fp16")]; tensor mul_25_cast_fp16 = mul(x = query_101_cast_fp16, y = var_85_to_fp16)[name = string("mul_25_cast_fp16")]; bool matmul_25_transpose_y_0 = const()[name = string("matmul_25_transpose_y_0"), val = bool(true)]; bool matmul_25_transpose_x_0 = const()[name = string("matmul_25_transpose_x_0"), val = bool(false)]; tensor matmul_25_cast_fp16 = matmul(transpose_x = matmul_25_transpose_x_0, transpose_y = matmul_25_transpose_y_0, x = mul_25_cast_fp16, y = key_101_cast_fp16)[name = string("matmul_25_cast_fp16")]; tensor add_497_cast_fp16 = add(x = matmul_25_cast_fp16, y = attention_mask_51_cast_fp16)[name = string("add_497_cast_fp16")]; int32 softmax_25_axis_0 = const()[name = string("softmax_25_axis_0"), val = int32(-1)]; tensor softmax_25_cast_fp16 = softmax(axis = softmax_25_axis_0, x = add_497_cast_fp16)[name = string("softmax_25_cast_fp16")]; bool attn_output_101_transpose_x_0 = const()[name = string("attn_output_101_transpose_x_0"), val = bool(false)]; bool attn_output_101_transpose_y_0 = const()[name = string("attn_output_101_transpose_y_0"), val = bool(false)]; tensor attn_output_101_cast_fp16 = matmul(transpose_x = attn_output_101_transpose_x_0, transpose_y = attn_output_101_transpose_y_0, x = softmax_25_cast_fp16, y = value_101_cast_fp16)[name = string("attn_output_101_cast_fp16")]; tensor var_5506_perm_0 = const()[name = string("op_5506_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_494_axis_0 = const()[name = string("concat_494_axis_0"), val = int32(0)]; bool concat_494_interleave_0 = const()[name = string("concat_494_interleave_0"), val = bool(false)]; int32 gather_455_cast_uint16_to_int32 = cast(dtype = gather_455_cast_uint16_to_int32_dtype_0, x = gather_455_cast_uint16)[name = string("cast_537")]; tensor concat_494 = concat(axis = concat_494_axis_0, interleave = concat_494_interleave_0, values = (gather_454, gather_455_cast_uint16_to_int32, var_72))[name = string("concat_494")]; tensor var_5506_cast_fp16 = transpose(perm = var_5506_perm_0, x = attn_output_101_cast_fp16)[name = string("transpose_8")]; tensor var_5509_cast_fp16 = reshape(shape = concat_494, x = var_5506_cast_fp16)[name = string("op_5509_cast_fp16")]; tensor model_model_layers_25_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_25_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1106134080)))]; tensor linear_178_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_25_self_attn_o_proj_weight_to_fp16, x = var_5509_cast_fp16)[name = string("linear_178_cast_fp16")]; tensor hidden_states_1185_cast_fp16 = add(x = hidden_states_1149_cast_fp16, y = linear_178_cast_fp16)[name = string("hidden_states_1185_cast_fp16")]; fp16 var_78_promoted_103_to_fp16 = const()[name = string("op_78_promoted_103_to_fp16"), val = fp16(0x1p+1)]; tensor var_5516_cast_fp16 = pow(x = hidden_states_1185_cast_fp16, y = var_78_promoted_103_to_fp16)[name = string("op_5516_cast_fp16")]; tensor variance_207_axes_0 = const()[name = string("variance_207_axes_0"), val = tensor([-1])]; bool variance_207_keep_dims_0 = const()[name = string("variance_207_keep_dims_0"), val = bool(true)]; tensor variance_207_cast_fp16 = reduce_mean(axes = variance_207_axes_0, keep_dims = variance_207_keep_dims_0, x = var_5516_cast_fp16)[name = string("variance_207_cast_fp16")]; fp16 var_5519_to_fp16 = const()[name = string("op_5519_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5520_cast_fp16 = add(x = variance_207_cast_fp16, y = var_5519_to_fp16)[name = string("op_5520_cast_fp16")]; fp32 var_5521_epsilon_0 = const()[name = string("op_5521_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5521_cast_fp16 = rsqrt(epsilon = var_5521_epsilon_0, x = var_5520_cast_fp16)[name = string("op_5521_cast_fp16")]; tensor hidden_states_1189_cast_fp16 = mul(x = hidden_states_1185_cast_fp16, y = var_5521_cast_fp16)[name = string("hidden_states_1189_cast_fp16")]; tensor model_model_layers_25_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_25_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1110328448)))]; tensor input_203_cast_fp16 = mul(x = model_model_layers_25_post_attention_layernorm_weight_to_fp16, y = hidden_states_1189_cast_fp16)[name = string("input_203_cast_fp16")]; tensor model_model_layers_25_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_25_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1110330560)))]; tensor linear_179_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_25_mlp_gate_proj_weight_to_fp16, x = input_203_cast_fp16)[name = string("linear_179_cast_fp16")]; tensor var_5533_cast_fp16 = silu(x = linear_179_cast_fp16)[name = string("op_5533_cast_fp16")]; tensor model_model_layers_25_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_25_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1116622080)))]; tensor linear_180_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_25_mlp_up_proj_weight_to_fp16, x = input_203_cast_fp16)[name = string("linear_180_cast_fp16")]; tensor input_207_cast_fp16 = mul(x = var_5533_cast_fp16, y = linear_180_cast_fp16)[name = string("input_207_cast_fp16")]; tensor model_model_layers_25_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_25_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1122913600)))]; tensor linear_181_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_25_mlp_down_proj_weight_to_fp16, x = input_207_cast_fp16)[name = string("linear_181_cast_fp16")]; tensor hidden_states_1195_cast_fp16 = add(x = hidden_states_1185_cast_fp16, y = linear_181_cast_fp16)[name = string("hidden_states_1195_cast_fp16")]; fp16 var_78_promoted_104_to_fp16 = const()[name = string("op_78_promoted_104_to_fp16"), val = fp16(0x1p+1)]; tensor var_5546_cast_fp16 = pow(x = hidden_states_1195_cast_fp16, y = var_78_promoted_104_to_fp16)[name = string("op_5546_cast_fp16")]; tensor variance_209_axes_0 = const()[name = string("variance_209_axes_0"), val = tensor([-1])]; bool variance_209_keep_dims_0 = const()[name = string("variance_209_keep_dims_0"), val = bool(true)]; tensor variance_209_cast_fp16 = reduce_mean(axes = variance_209_axes_0, keep_dims = variance_209_keep_dims_0, x = var_5546_cast_fp16)[name = string("variance_209_cast_fp16")]; fp16 var_5549_to_fp16 = const()[name = string("op_5549_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5550_cast_fp16 = add(x = variance_209_cast_fp16, y = var_5549_to_fp16)[name = string("op_5550_cast_fp16")]; fp32 var_5551_epsilon_0 = const()[name = string("op_5551_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5551_cast_fp16 = rsqrt(epsilon = var_5551_epsilon_0, x = var_5550_cast_fp16)[name = string("op_5551_cast_fp16")]; tensor hidden_states_1199_cast_fp16 = mul(x = hidden_states_1195_cast_fp16, y = var_5551_cast_fp16)[name = string("hidden_states_1199_cast_fp16")]; tensor model_model_layers_26_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1129205120)))]; tensor hidden_states_1203_cast_fp16 = mul(x = model_model_layers_26_input_layernorm_weight_to_fp16, y = hidden_states_1199_cast_fp16)[name = string("hidden_states_1203_cast_fp16")]; tensor var_5564_shape_cast_fp16 = shape(x = hidden_states_1203_cast_fp16)[name = string("op_5564_shape_cast_fp16")]; int32 gather_472 = const()[name = string("gather_472"), val = int32(1)]; int32 gather_473_axis_0 = const()[name = string("gather_473_axis_0"), val = int32(0)]; int32 gather_473_batch_dims_0 = const()[name = string("gather_473_batch_dims_0"), val = int32(0)]; bool gather_473_validate_indices_0 = const()[name = string("gather_473_validate_indices_0"), val = bool(false)]; string var_5564_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5564_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_473_to_uint16 = const()[name = string("select_473_to_uint16"), val = uint16(1)]; tensor var_5564_shape_cast_fp16_to_uint16 = cast(dtype = var_5564_shape_cast_fp16_to_uint16_dtype_0, x = var_5564_shape_cast_fp16)[name = string("cast_530")]; uint16 gather_473_cast_uint16 = gather(axis = gather_473_axis_0, batch_dims = gather_473_batch_dims_0, indices = select_473_to_uint16, validate_indices = gather_473_validate_indices_0, x = var_5564_shape_cast_fp16_to_uint16)[name = string("gather_473_cast_uint16")]; string gather_473_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_473_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_26_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1129207232)))]; tensor linear_182_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_26_self_attn_q_proj_weight_to_fp16, x = hidden_states_1203_cast_fp16)[name = string("linear_182_cast_fp16")]; tensor concat_495x = const()[name = string("concat_495x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_1205_cast_fp16 = reshape(shape = concat_495x, x = linear_182_cast_fp16)[name = string("hidden_states_1205_cast_fp16")]; fp16 var_78_promoted_105_to_fp16 = const()[name = string("op_78_promoted_105_to_fp16"), val = fp16(0x1p+1)]; tensor var_5572_cast_fp16 = pow(x = hidden_states_1205_cast_fp16, y = var_78_promoted_105_to_fp16)[name = string("op_5572_cast_fp16")]; tensor variance_211_axes_0 = const()[name = string("variance_211_axes_0"), val = tensor([-1])]; bool variance_211_keep_dims_0 = const()[name = string("variance_211_keep_dims_0"), val = bool(true)]; tensor variance_211_cast_fp16 = reduce_mean(axes = variance_211_axes_0, keep_dims = variance_211_keep_dims_0, x = var_5572_cast_fp16)[name = string("variance_211_cast_fp16")]; fp16 var_5575_to_fp16 = const()[name = string("op_5575_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5576_cast_fp16 = add(x = variance_211_cast_fp16, y = var_5575_to_fp16)[name = string("op_5576_cast_fp16")]; fp32 var_5577_epsilon_0 = const()[name = string("op_5577_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5577_cast_fp16 = rsqrt(epsilon = var_5577_epsilon_0, x = var_5576_cast_fp16)[name = string("op_5577_cast_fp16")]; tensor hidden_states_1209_cast_fp16 = mul(x = hidden_states_1205_cast_fp16, y = var_5577_cast_fp16)[name = string("hidden_states_1209_cast_fp16")]; tensor model_model_layers_26_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1133401600)))]; tensor var_5580_cast_fp16 = mul(x = model_model_layers_26_self_attn_q_norm_weight_to_fp16, y = hidden_states_1209_cast_fp16)[name = string("op_5580_cast_fp16")]; tensor q_53_perm_0 = const()[name = string("q_53_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_26_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1133401920)))]; tensor linear_183_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_26_self_attn_k_proj_weight_to_fp16, x = hidden_states_1203_cast_fp16)[name = string("linear_183_cast_fp16")]; tensor concat_496x = const()[name = string("concat_496x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_1213_cast_fp16 = reshape(shape = concat_496x, x = linear_183_cast_fp16)[name = string("hidden_states_1213_cast_fp16")]; fp16 var_78_promoted_106_to_fp16 = const()[name = string("op_78_promoted_106_to_fp16"), val = fp16(0x1p+1)]; tensor var_5588_cast_fp16 = pow(x = hidden_states_1213_cast_fp16, y = var_78_promoted_106_to_fp16)[name = string("op_5588_cast_fp16")]; tensor variance_213_axes_0 = const()[name = string("variance_213_axes_0"), val = tensor([-1])]; bool variance_213_keep_dims_0 = const()[name = string("variance_213_keep_dims_0"), val = bool(true)]; tensor variance_213_cast_fp16 = reduce_mean(axes = variance_213_axes_0, keep_dims = variance_213_keep_dims_0, x = var_5588_cast_fp16)[name = string("variance_213_cast_fp16")]; fp16 var_5591_to_fp16 = const()[name = string("op_5591_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5592_cast_fp16 = add(x = variance_213_cast_fp16, y = var_5591_to_fp16)[name = string("op_5592_cast_fp16")]; fp32 var_5593_epsilon_0 = const()[name = string("op_5593_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5593_cast_fp16 = rsqrt(epsilon = var_5593_epsilon_0, x = var_5592_cast_fp16)[name = string("op_5593_cast_fp16")]; tensor hidden_states_1217_cast_fp16 = mul(x = hidden_states_1213_cast_fp16, y = var_5593_cast_fp16)[name = string("hidden_states_1217_cast_fp16")]; tensor model_model_layers_26_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1135499136)))]; tensor var_5596_cast_fp16 = mul(x = model_model_layers_26_self_attn_k_norm_weight_to_fp16, y = hidden_states_1217_cast_fp16)[name = string("op_5596_cast_fp16")]; tensor k_53_perm_0 = const()[name = string("k_53_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_26_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1135499456)))]; tensor linear_184_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_26_self_attn_v_proj_weight_to_fp16, x = hidden_states_1203_cast_fp16)[name = string("linear_184_cast_fp16")]; tensor concat_497x = const()[name = string("concat_497x"), val = tensor([1, -1, 8, 128])]; tensor var_5601_cast_fp16 = reshape(shape = concat_497x, x = linear_184_cast_fp16)[name = string("op_5601_cast_fp16")]; tensor v_state_53_perm_0 = const()[name = string("v_state_53_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_53_cast_fp16 = transpose(perm = q_53_perm_0, x = var_5580_cast_fp16)[name = string("transpose_7")]; tensor var_5605_cast_fp16 = mul(x = q_53_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5605_cast_fp16")]; tensor x1_105_begin_0 = const()[name = string("x1_105_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_105_end_0 = const()[name = string("x1_105_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_105_end_mask_0 = const()[name = string("x1_105_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_105_cast_fp16 = slice_by_index(begin = x1_105_begin_0, end = x1_105_end_0, end_mask = x1_105_end_mask_0, x = q_53_cast_fp16)[name = string("x1_105_cast_fp16")]; tensor x2_105_begin_0 = const()[name = string("x2_105_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_105_end_0 = const()[name = string("x2_105_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_105_end_mask_0 = const()[name = string("x2_105_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_105_cast_fp16 = slice_by_index(begin = x2_105_begin_0, end = x2_105_end_0, end_mask = x2_105_end_mask_0, x = q_53_cast_fp16)[name = string("x2_105_cast_fp16")]; fp16 const_56_promoted_to_fp16 = const()[name = string("const_56_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5616_cast_fp16 = mul(x = x2_105_cast_fp16, y = const_56_promoted_to_fp16)[name = string("op_5616_cast_fp16")]; bool var_5618_interleave_0 = const()[name = string("op_5618_interleave_0"), val = bool(false)]; tensor var_5618_cast_fp16 = concat(axis = var_72, interleave = var_5618_interleave_0, values = (var_5616_cast_fp16, x1_105_cast_fp16))[name = string("op_5618_cast_fp16")]; tensor var_5619_cast_fp16 = mul(x = var_5618_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5619_cast_fp16")]; tensor query_105_cast_fp16 = add(x = var_5605_cast_fp16, y = var_5619_cast_fp16)[name = string("query_105_cast_fp16")]; tensor k_53_cast_fp16 = transpose(perm = k_53_perm_0, x = var_5596_cast_fp16)[name = string("transpose_6")]; tensor var_5621_cast_fp16 = mul(x = k_53_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5621_cast_fp16")]; tensor x1_107_begin_0 = const()[name = string("x1_107_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_107_end_0 = const()[name = string("x1_107_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_107_end_mask_0 = const()[name = string("x1_107_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_107_cast_fp16 = slice_by_index(begin = x1_107_begin_0, end = x1_107_end_0, end_mask = x1_107_end_mask_0, x = k_53_cast_fp16)[name = string("x1_107_cast_fp16")]; tensor x2_107_begin_0 = const()[name = string("x2_107_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_107_end_0 = const()[name = string("x2_107_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_107_end_mask_0 = const()[name = string("x2_107_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_107_cast_fp16 = slice_by_index(begin = x2_107_begin_0, end = x2_107_end_0, end_mask = x2_107_end_mask_0, x = k_53_cast_fp16)[name = string("x2_107_cast_fp16")]; fp16 const_57_promoted_to_fp16 = const()[name = string("const_57_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5632_cast_fp16 = mul(x = x2_107_cast_fp16, y = const_57_promoted_to_fp16)[name = string("op_5632_cast_fp16")]; bool var_5634_interleave_0 = const()[name = string("op_5634_interleave_0"), val = bool(false)]; tensor var_5634_cast_fp16 = concat(axis = var_72, interleave = var_5634_interleave_0, values = (var_5632_cast_fp16, x1_107_cast_fp16))[name = string("op_5634_cast_fp16")]; tensor var_5635_cast_fp16 = mul(x = var_5634_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5635_cast_fp16")]; tensor k_state_53_cast_fp16 = add(x = var_5621_cast_fp16, y = var_5635_cast_fp16)[name = string("k_state_53_cast_fp16")]; tensor expand_dims_312 = const()[name = string("expand_dims_312"), val = tensor([0])]; tensor expand_dims_313 = const()[name = string("expand_dims_313"), val = tensor([0])]; tensor expand_dims_315 = const()[name = string("expand_dims_315"), val = tensor([0])]; tensor concat_500_values0_0 = const()[name = string("concat_500_values0_0"), val = tensor([26])]; int32 concat_500_axis_0 = const()[name = string("concat_500_axis_0"), val = int32(0)]; bool concat_500_interleave_0 = const()[name = string("concat_500_interleave_0"), val = bool(false)]; tensor concat_500 = concat(axis = concat_500_axis_0, interleave = concat_500_interleave_0, values = (concat_500_values0_0, expand_dims_312, expand_dims_313, expand_dims_2, expand_dims_315))[name = string("concat_500")]; tensor key_cache_internal_tensor_assign_27_stride_0 = const()[name = string("key_cache_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_27_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_27_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_500, begin_mask = key_cache_internal_tensor_assign_27_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_27_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_27_squeeze_mask_0, stride = key_cache_internal_tensor_assign_27_stride_0, update = k_state_53_cast_fp16, x = coreml_update_state_106)[name = string("key_cache_internal_tensor_assign_27_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_27_cast_fp16, input = key_cache)[name = string("coreml_update_state_108_write_state")]; tensor coreml_update_state_108 = read_state(input = key_cache)[name = string("coreml_update_state_108")]; tensor value_cache_internal_tensor_assign_27_stride_0 = const()[name = string("value_cache_internal_tensor_assign_27_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_27_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_27_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_27_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_27_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_27_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_27_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_53_cast_fp16 = transpose(perm = v_state_53_perm_0, x = var_5601_cast_fp16)[name = string("transpose_5")]; tensor value_cache_internal_tensor_assign_27_cast_fp16 = slice_update(begin = concat_500, begin_mask = value_cache_internal_tensor_assign_27_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_27_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_27_squeeze_mask_0, stride = value_cache_internal_tensor_assign_27_stride_0, update = v_state_53_cast_fp16, x = coreml_update_state_107)[name = string("value_cache_internal_tensor_assign_27_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_27_cast_fp16, input = value_cache)[name = string("coreml_update_state_109_write_state")]; tensor coreml_update_state_109 = read_state(input = value_cache)[name = string("coreml_update_state_109")]; tensor var_5658_begin_0 = const()[name = string("op_5658_begin_0"), val = tensor([26, 0, 0, 0, 0])]; tensor var_5658_end_0 = const()[name = string("op_5658_end_0"), val = tensor([27, 1, 8, 2048, 128])]; tensor var_5658_end_mask_0 = const()[name = string("op_5658_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5658_squeeze_mask_0 = const()[name = string("op_5658_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_5658_cast_fp16 = slice_by_index(begin = var_5658_begin_0, end = var_5658_end_0, end_mask = var_5658_end_mask_0, squeeze_mask = var_5658_squeeze_mask_0, x = coreml_update_state_108)[name = string("op_5658_cast_fp16")]; tensor var_5661_begin_0 = const()[name = string("op_5661_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5661_end_mask_0 = const()[name = string("op_5661_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5661_cast_fp16 = slice_by_index(begin = var_5661_begin_0, end = concat_12, end_mask = var_5661_end_mask_0, x = var_5658_cast_fp16)[name = string("op_5661_cast_fp16")]; tensor var_5663_begin_0 = const()[name = string("op_5663_begin_0"), val = tensor([26, 0, 0, 0, 0])]; tensor var_5663_end_0 = const()[name = string("op_5663_end_0"), val = tensor([27, 1, 8, 2048, 128])]; tensor var_5663_end_mask_0 = const()[name = string("op_5663_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5663_squeeze_mask_0 = const()[name = string("op_5663_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_5663_cast_fp16 = slice_by_index(begin = var_5663_begin_0, end = var_5663_end_0, end_mask = var_5663_end_mask_0, squeeze_mask = var_5663_squeeze_mask_0, x = coreml_update_state_109)[name = string("op_5663_cast_fp16")]; tensor var_5666_begin_0 = const()[name = string("op_5666_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5666_end_mask_0 = const()[name = string("op_5666_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5666_cast_fp16 = slice_by_index(begin = var_5666_begin_0, end = concat_12, end_mask = var_5666_end_mask_0, x = var_5663_cast_fp16)[name = string("op_5666_cast_fp16")]; tensor var_5668_shape_cast_fp16 = shape(x = var_5661_cast_fp16)[name = string("op_5668_shape_cast_fp16")]; int32 gather_481 = const()[name = string("gather_481"), val = int32(1)]; int32 gather_482 = const()[name = string("gather_482"), val = int32(8)]; int32 gather_483_axis_0 = const()[name = string("gather_483_axis_0"), val = int32(0)]; int32 gather_483_batch_dims_0 = const()[name = string("gather_483_batch_dims_0"), val = int32(0)]; bool gather_483_validate_indices_0 = const()[name = string("gather_483_validate_indices_0"), val = bool(false)]; string var_5668_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5668_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_483_to_uint16 = const()[name = string("select_483_to_uint16"), val = uint16(2)]; tensor var_5668_shape_cast_fp16_to_uint16 = cast(dtype = var_5668_shape_cast_fp16_to_uint16_dtype_0, x = var_5668_shape_cast_fp16)[name = string("cast_528")]; uint16 gather_483_cast_uint16 = gather(axis = gather_483_axis_0, batch_dims = gather_483_batch_dims_0, indices = select_483_to_uint16, validate_indices = gather_483_validate_indices_0, x = var_5668_shape_cast_fp16_to_uint16)[name = string("gather_483_cast_uint16")]; string gather_483_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_483_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_484 = const()[name = string("gather_484"), val = int32(128)]; tensor var_5675_axes_0 = const()[name = string("op_5675_axes_0"), val = tensor([2])]; tensor var_5675_cast_fp16 = expand_dims(axes = var_5675_axes_0, x = var_5661_cast_fp16)[name = string("op_5675_cast_fp16")]; int32 concat_508_axis_0 = const()[name = string("concat_508_axis_0"), val = int32(0)]; bool concat_508_interleave_0 = const()[name = string("concat_508_interleave_0"), val = bool(false)]; int32 gather_483_cast_uint16_to_int32 = cast(dtype = gather_483_cast_uint16_to_int32_dtype_0, x = gather_483_cast_uint16)[name = string("cast_527")]; tensor concat_508 = concat(axis = concat_508_axis_0, interleave = concat_508_interleave_0, values = (gather_481, gather_482, var_78, gather_483_cast_uint16_to_int32, gather_484))[name = string("concat_508")]; tensor shape_537_cast_fp16 = shape(x = var_5675_cast_fp16)[name = string("shape_537_cast_fp16")]; tensor real_div_52 = real_div(x = concat_508, y = shape_537_cast_fp16)[name = string("real_div_52")]; tensor hidden_states_1223_cast_fp16 = tile(reps = real_div_52, x = var_5675_cast_fp16)[name = string("hidden_states_1223_cast_fp16")]; tensor concat_509x = const()[name = string("concat_509x"), val = tensor([1, 16, -1, 128])]; tensor key_105_cast_fp16 = reshape(shape = concat_509x, x = hidden_states_1223_cast_fp16)[name = string("key_105_cast_fp16")]; tensor var_5685_shape_cast_fp16 = shape(x = var_5666_cast_fp16)[name = string("op_5685_shape_cast_fp16")]; int32 gather_485 = const()[name = string("gather_485"), val = int32(1)]; int32 gather_486 = const()[name = string("gather_486"), val = int32(8)]; int32 gather_487_axis_0 = const()[name = string("gather_487_axis_0"), val = int32(0)]; int32 gather_487_batch_dims_0 = const()[name = string("gather_487_batch_dims_0"), val = int32(0)]; bool gather_487_validate_indices_0 = const()[name = string("gather_487_validate_indices_0"), val = bool(false)]; string var_5685_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5685_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_487_to_uint16 = const()[name = string("select_487_to_uint16"), val = uint16(2)]; tensor var_5685_shape_cast_fp16_to_uint16 = cast(dtype = var_5685_shape_cast_fp16_to_uint16_dtype_0, x = var_5685_shape_cast_fp16)[name = string("cast_526")]; uint16 gather_487_cast_uint16 = gather(axis = gather_487_axis_0, batch_dims = gather_487_batch_dims_0, indices = select_487_to_uint16, validate_indices = gather_487_validate_indices_0, x = var_5685_shape_cast_fp16_to_uint16)[name = string("gather_487_cast_uint16")]; string gather_487_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_487_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_488 = const()[name = string("gather_488"), val = int32(128)]; tensor var_5692_axes_0 = const()[name = string("op_5692_axes_0"), val = tensor([2])]; tensor var_5692_cast_fp16 = expand_dims(axes = var_5692_axes_0, x = var_5666_cast_fp16)[name = string("op_5692_cast_fp16")]; int32 concat_510_axis_0 = const()[name = string("concat_510_axis_0"), val = int32(0)]; bool concat_510_interleave_0 = const()[name = string("concat_510_interleave_0"), val = bool(false)]; int32 gather_487_cast_uint16_to_int32 = cast(dtype = gather_487_cast_uint16_to_int32_dtype_0, x = gather_487_cast_uint16)[name = string("cast_525")]; tensor concat_510 = concat(axis = concat_510_axis_0, interleave = concat_510_interleave_0, values = (gather_485, gather_486, var_78, gather_487_cast_uint16_to_int32, gather_488))[name = string("concat_510")]; tensor shape_542_cast_fp16 = shape(x = var_5692_cast_fp16)[name = string("shape_542_cast_fp16")]; tensor real_div_53 = real_div(x = concat_510, y = shape_542_cast_fp16)[name = string("real_div_53")]; tensor hidden_states_1227_cast_fp16 = tile(reps = real_div_53, x = var_5692_cast_fp16)[name = string("hidden_states_1227_cast_fp16")]; tensor concat_511x = const()[name = string("concat_511x"), val = tensor([1, 16, -1, 128])]; tensor value_105_cast_fp16 = reshape(shape = concat_511x, x = hidden_states_1227_cast_fp16)[name = string("value_105_cast_fp16")]; tensor var_5702_shape_cast_fp16 = shape(x = key_105_cast_fp16)[name = string("op_5702_shape_cast_fp16")]; int32 gather_489_axis_0 = const()[name = string("gather_489_axis_0"), val = int32(0)]; int32 gather_489_batch_dims_0 = const()[name = string("gather_489_batch_dims_0"), val = int32(0)]; bool gather_489_validate_indices_0 = const()[name = string("gather_489_validate_indices_0"), val = bool(false)]; string var_5702_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5702_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_489_to_uint16 = const()[name = string("select_489_to_uint16"), val = uint16(2)]; tensor var_5702_shape_cast_fp16_to_uint16 = cast(dtype = var_5702_shape_cast_fp16_to_uint16_dtype_0, x = var_5702_shape_cast_fp16)[name = string("cast_524")]; uint16 gather_489_cast_uint16 = gather(axis = gather_489_axis_0, batch_dims = gather_489_batch_dims_0, indices = select_489_to_uint16, validate_indices = gather_489_validate_indices_0, x = var_5702_shape_cast_fp16_to_uint16)[name = string("gather_489_cast_uint16")]; string gather_489_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_489_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_512_values0_0 = const()[name = string("concat_512_values0_0"), val = int32(1)]; int32 concat_512_values1_0 = const()[name = string("concat_512_values1_0"), val = int32(1)]; int32 concat_512_values2_0 = const()[name = string("concat_512_values2_0"), val = int32(0)]; int32 concat_512_axis_0 = const()[name = string("concat_512_axis_0"), val = int32(0)]; bool concat_512_interleave_0 = const()[name = string("concat_512_interleave_0"), val = bool(false)]; int32 gather_489_cast_uint16_to_int32 = cast(dtype = gather_489_cast_uint16_to_int32_dtype_0, x = gather_489_cast_uint16)[name = string("cast_523")]; tensor concat_512 = concat(axis = concat_512_axis_0, interleave = concat_512_interleave_0, values = (concat_512_values0_0, concat_512_values1_0, concat_512_values2_0, gather_489_cast_uint16_to_int32))[name = string("concat_512")]; tensor attention_mask_53_begin_0 = const()[name = string("attention_mask_53_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_53_end_mask_0 = const()[name = string("attention_mask_53_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_53_cast_fp16 = slice_by_index(begin = attention_mask_53_begin_0, end = concat_512, end_mask = attention_mask_53_end_mask_0, x = causal_mask)[name = string("attention_mask_53_cast_fp16")]; tensor mul_26_cast_fp16 = mul(x = query_105_cast_fp16, y = var_85_to_fp16)[name = string("mul_26_cast_fp16")]; bool matmul_26_transpose_y_0 = const()[name = string("matmul_26_transpose_y_0"), val = bool(true)]; bool matmul_26_transpose_x_0 = const()[name = string("matmul_26_transpose_x_0"), val = bool(false)]; tensor matmul_26_cast_fp16 = matmul(transpose_x = matmul_26_transpose_x_0, transpose_y = matmul_26_transpose_y_0, x = mul_26_cast_fp16, y = key_105_cast_fp16)[name = string("matmul_26_cast_fp16")]; tensor add_516_cast_fp16 = add(x = matmul_26_cast_fp16, y = attention_mask_53_cast_fp16)[name = string("add_516_cast_fp16")]; int32 softmax_26_axis_0 = const()[name = string("softmax_26_axis_0"), val = int32(-1)]; tensor softmax_26_cast_fp16 = softmax(axis = softmax_26_axis_0, x = add_516_cast_fp16)[name = string("softmax_26_cast_fp16")]; bool attn_output_105_transpose_x_0 = const()[name = string("attn_output_105_transpose_x_0"), val = bool(false)]; bool attn_output_105_transpose_y_0 = const()[name = string("attn_output_105_transpose_y_0"), val = bool(false)]; tensor attn_output_105_cast_fp16 = matmul(transpose_x = attn_output_105_transpose_x_0, transpose_y = attn_output_105_transpose_y_0, x = softmax_26_cast_fp16, y = value_105_cast_fp16)[name = string("attn_output_105_cast_fp16")]; tensor var_5711_perm_0 = const()[name = string("op_5711_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_513_axis_0 = const()[name = string("concat_513_axis_0"), val = int32(0)]; bool concat_513_interleave_0 = const()[name = string("concat_513_interleave_0"), val = bool(false)]; int32 gather_473_cast_uint16_to_int32 = cast(dtype = gather_473_cast_uint16_to_int32_dtype_0, x = gather_473_cast_uint16)[name = string("cast_529")]; tensor concat_513 = concat(axis = concat_513_axis_0, interleave = concat_513_interleave_0, values = (gather_472, gather_473_cast_uint16_to_int32, var_72))[name = string("concat_513")]; tensor var_5711_cast_fp16 = transpose(perm = var_5711_perm_0, x = attn_output_105_cast_fp16)[name = string("transpose_4")]; tensor var_5714_cast_fp16 = reshape(shape = concat_513, x = var_5711_cast_fp16)[name = string("op_5714_cast_fp16")]; tensor model_model_layers_26_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_26_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1137596672)))]; tensor linear_185_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_26_self_attn_o_proj_weight_to_fp16, x = var_5714_cast_fp16)[name = string("linear_185_cast_fp16")]; tensor hidden_states_1231_cast_fp16 = add(x = hidden_states_1195_cast_fp16, y = linear_185_cast_fp16)[name = string("hidden_states_1231_cast_fp16")]; fp16 var_78_promoted_107_to_fp16 = const()[name = string("op_78_promoted_107_to_fp16"), val = fp16(0x1p+1)]; tensor var_5721_cast_fp16 = pow(x = hidden_states_1231_cast_fp16, y = var_78_promoted_107_to_fp16)[name = string("op_5721_cast_fp16")]; tensor variance_215_axes_0 = const()[name = string("variance_215_axes_0"), val = tensor([-1])]; bool variance_215_keep_dims_0 = const()[name = string("variance_215_keep_dims_0"), val = bool(true)]; tensor variance_215_cast_fp16 = reduce_mean(axes = variance_215_axes_0, keep_dims = variance_215_keep_dims_0, x = var_5721_cast_fp16)[name = string("variance_215_cast_fp16")]; fp16 var_5724_to_fp16 = const()[name = string("op_5724_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5725_cast_fp16 = add(x = variance_215_cast_fp16, y = var_5724_to_fp16)[name = string("op_5725_cast_fp16")]; fp32 var_5726_epsilon_0 = const()[name = string("op_5726_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5726_cast_fp16 = rsqrt(epsilon = var_5726_epsilon_0, x = var_5725_cast_fp16)[name = string("op_5726_cast_fp16")]; tensor hidden_states_1235_cast_fp16 = mul(x = hidden_states_1231_cast_fp16, y = var_5726_cast_fp16)[name = string("hidden_states_1235_cast_fp16")]; tensor model_model_layers_26_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_26_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1141791040)))]; tensor input_211_cast_fp16 = mul(x = model_model_layers_26_post_attention_layernorm_weight_to_fp16, y = hidden_states_1235_cast_fp16)[name = string("input_211_cast_fp16")]; tensor model_model_layers_26_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_26_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1141793152)))]; tensor linear_186_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_26_mlp_gate_proj_weight_to_fp16, x = input_211_cast_fp16)[name = string("linear_186_cast_fp16")]; tensor var_5738_cast_fp16 = silu(x = linear_186_cast_fp16)[name = string("op_5738_cast_fp16")]; tensor model_model_layers_26_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_26_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1148084672)))]; tensor linear_187_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_26_mlp_up_proj_weight_to_fp16, x = input_211_cast_fp16)[name = string("linear_187_cast_fp16")]; tensor input_215_cast_fp16 = mul(x = var_5738_cast_fp16, y = linear_187_cast_fp16)[name = string("input_215_cast_fp16")]; tensor model_model_layers_26_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_26_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1154376192)))]; tensor linear_188_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_26_mlp_down_proj_weight_to_fp16, x = input_215_cast_fp16)[name = string("linear_188_cast_fp16")]; tensor hidden_states_1241_cast_fp16 = add(x = hidden_states_1231_cast_fp16, y = linear_188_cast_fp16)[name = string("hidden_states_1241_cast_fp16")]; fp16 var_78_promoted_108_to_fp16 = const()[name = string("op_78_promoted_108_to_fp16"), val = fp16(0x1p+1)]; tensor var_5751_cast_fp16 = pow(x = hidden_states_1241_cast_fp16, y = var_78_promoted_108_to_fp16)[name = string("op_5751_cast_fp16")]; tensor variance_217_axes_0 = const()[name = string("variance_217_axes_0"), val = tensor([-1])]; bool variance_217_keep_dims_0 = const()[name = string("variance_217_keep_dims_0"), val = bool(true)]; tensor variance_217_cast_fp16 = reduce_mean(axes = variance_217_axes_0, keep_dims = variance_217_keep_dims_0, x = var_5751_cast_fp16)[name = string("variance_217_cast_fp16")]; fp16 var_5754_to_fp16 = const()[name = string("op_5754_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5755_cast_fp16 = add(x = variance_217_cast_fp16, y = var_5754_to_fp16)[name = string("op_5755_cast_fp16")]; fp32 var_5756_epsilon_0 = const()[name = string("op_5756_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5756_cast_fp16 = rsqrt(epsilon = var_5756_epsilon_0, x = var_5755_cast_fp16)[name = string("op_5756_cast_fp16")]; tensor hidden_states_1245_cast_fp16 = mul(x = hidden_states_1241_cast_fp16, y = var_5756_cast_fp16)[name = string("hidden_states_1245_cast_fp16")]; tensor model_model_layers_27_input_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_input_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1160667712)))]; tensor hidden_states_1249_cast_fp16 = mul(x = model_model_layers_27_input_layernorm_weight_to_fp16, y = hidden_states_1245_cast_fp16)[name = string("hidden_states_1249_cast_fp16")]; tensor var_5769_shape_cast_fp16 = shape(x = hidden_states_1249_cast_fp16)[name = string("op_5769_shape_cast_fp16")]; int32 gather_490 = const()[name = string("gather_490"), val = int32(1)]; int32 gather_491_axis_0 = const()[name = string("gather_491_axis_0"), val = int32(0)]; int32 gather_491_batch_dims_0 = const()[name = string("gather_491_batch_dims_0"), val = int32(0)]; bool gather_491_validate_indices_0 = const()[name = string("gather_491_validate_indices_0"), val = bool(false)]; string var_5769_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5769_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_491_to_uint16 = const()[name = string("select_491_to_uint16"), val = uint16(1)]; tensor var_5769_shape_cast_fp16_to_uint16 = cast(dtype = var_5769_shape_cast_fp16_to_uint16_dtype_0, x = var_5769_shape_cast_fp16)[name = string("cast_522")]; uint16 gather_491_cast_uint16 = gather(axis = gather_491_axis_0, batch_dims = gather_491_batch_dims_0, indices = select_491_to_uint16, validate_indices = gather_491_validate_indices_0, x = var_5769_shape_cast_fp16_to_uint16)[name = string("gather_491_cast_uint16")]; string gather_491_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_491_cast_uint16_to_int32_dtype_0"), val = string("int32")]; tensor model_model_layers_27_self_attn_q_proj_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1160669824)))]; tensor linear_189_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = model_model_layers_27_self_attn_q_proj_weight_to_fp16, x = hidden_states_1249_cast_fp16)[name = string("linear_189_cast_fp16")]; tensor concat_514x = const()[name = string("concat_514x"), val = tensor([1, -1, 16, 128])]; tensor hidden_states_1251_cast_fp16 = reshape(shape = concat_514x, x = linear_189_cast_fp16)[name = string("hidden_states_1251_cast_fp16")]; fp16 var_78_promoted_109_to_fp16 = const()[name = string("op_78_promoted_109_to_fp16"), val = fp16(0x1p+1)]; tensor var_5777_cast_fp16 = pow(x = hidden_states_1251_cast_fp16, y = var_78_promoted_109_to_fp16)[name = string("op_5777_cast_fp16")]; tensor variance_219_axes_0 = const()[name = string("variance_219_axes_0"), val = tensor([-1])]; bool variance_219_keep_dims_0 = const()[name = string("variance_219_keep_dims_0"), val = bool(true)]; tensor variance_219_cast_fp16 = reduce_mean(axes = variance_219_axes_0, keep_dims = variance_219_keep_dims_0, x = var_5777_cast_fp16)[name = string("variance_219_cast_fp16")]; fp16 var_5780_to_fp16 = const()[name = string("op_5780_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5781_cast_fp16 = add(x = variance_219_cast_fp16, y = var_5780_to_fp16)[name = string("op_5781_cast_fp16")]; fp32 var_5782_epsilon_0 = const()[name = string("op_5782_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5782_cast_fp16 = rsqrt(epsilon = var_5782_epsilon_0, x = var_5781_cast_fp16)[name = string("op_5782_cast_fp16")]; tensor hidden_states_1255_cast_fp16 = mul(x = hidden_states_1251_cast_fp16, y = var_5782_cast_fp16)[name = string("hidden_states_1255_cast_fp16")]; tensor model_model_layers_27_self_attn_q_norm_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_q_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164864192)))]; tensor var_5785_cast_fp16 = mul(x = model_model_layers_27_self_attn_q_norm_weight_to_fp16, y = hidden_states_1255_cast_fp16)[name = string("op_5785_cast_fp16")]; tensor q_perm_0 = const()[name = string("q_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_27_self_attn_k_proj_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1164864512)))]; tensor linear_190_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_27_self_attn_k_proj_weight_to_fp16, x = hidden_states_1249_cast_fp16)[name = string("linear_190_cast_fp16")]; tensor concat_515x = const()[name = string("concat_515x"), val = tensor([1, -1, 8, 128])]; tensor hidden_states_1259_cast_fp16 = reshape(shape = concat_515x, x = linear_190_cast_fp16)[name = string("hidden_states_1259_cast_fp16")]; fp16 var_78_promoted_110_to_fp16 = const()[name = string("op_78_promoted_110_to_fp16"), val = fp16(0x1p+1)]; tensor var_5793_cast_fp16 = pow(x = hidden_states_1259_cast_fp16, y = var_78_promoted_110_to_fp16)[name = string("op_5793_cast_fp16")]; tensor variance_221_axes_0 = const()[name = string("variance_221_axes_0"), val = tensor([-1])]; bool variance_221_keep_dims_0 = const()[name = string("variance_221_keep_dims_0"), val = bool(true)]; tensor variance_221_cast_fp16 = reduce_mean(axes = variance_221_axes_0, keep_dims = variance_221_keep_dims_0, x = var_5793_cast_fp16)[name = string("variance_221_cast_fp16")]; fp16 var_5796_to_fp16 = const()[name = string("op_5796_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5797_cast_fp16 = add(x = variance_221_cast_fp16, y = var_5796_to_fp16)[name = string("op_5797_cast_fp16")]; fp32 var_5798_epsilon_0 = const()[name = string("op_5798_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5798_cast_fp16 = rsqrt(epsilon = var_5798_epsilon_0, x = var_5797_cast_fp16)[name = string("op_5798_cast_fp16")]; tensor hidden_states_1263_cast_fp16 = mul(x = hidden_states_1259_cast_fp16, y = var_5798_cast_fp16)[name = string("hidden_states_1263_cast_fp16")]; tensor model_model_layers_27_self_attn_k_norm_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_k_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1166961728)))]; tensor var_5801_cast_fp16 = mul(x = model_model_layers_27_self_attn_k_norm_weight_to_fp16, y = hidden_states_1263_cast_fp16)[name = string("op_5801_cast_fp16")]; tensor k_perm_0 = const()[name = string("k_perm_0"), val = tensor([0, 2, 1, 3])]; tensor model_model_layers_27_self_attn_v_proj_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1166962048)))]; tensor linear_191_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_27_self_attn_v_proj_weight_to_fp16, x = hidden_states_1249_cast_fp16)[name = string("linear_191_cast_fp16")]; tensor concat_516x = const()[name = string("concat_516x"), val = tensor([1, -1, 8, 128])]; tensor var_5806_cast_fp16 = reshape(shape = concat_516x, x = linear_191_cast_fp16)[name = string("op_5806_cast_fp16")]; tensor v_state_perm_0 = const()[name = string("v_state_perm_0"), val = tensor([0, 2, 1, 3])]; tensor q_cast_fp16 = transpose(perm = q_perm_0, x = var_5785_cast_fp16)[name = string("transpose_3")]; tensor var_5810_cast_fp16 = mul(x = q_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5810_cast_fp16")]; tensor x1_109_begin_0 = const()[name = string("x1_109_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_109_end_0 = const()[name = string("x1_109_end_0"), val = tensor([1, 16, 0, 64])]; tensor x1_109_end_mask_0 = const()[name = string("x1_109_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_109_cast_fp16 = slice_by_index(begin = x1_109_begin_0, end = x1_109_end_0, end_mask = x1_109_end_mask_0, x = q_cast_fp16)[name = string("x1_109_cast_fp16")]; tensor x2_109_begin_0 = const()[name = string("x2_109_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_109_end_0 = const()[name = string("x2_109_end_0"), val = tensor([1, 16, 0, 128])]; tensor x2_109_end_mask_0 = const()[name = string("x2_109_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_109_cast_fp16 = slice_by_index(begin = x2_109_begin_0, end = x2_109_end_0, end_mask = x2_109_end_mask_0, x = q_cast_fp16)[name = string("x2_109_cast_fp16")]; fp16 const_58_promoted_to_fp16 = const()[name = string("const_58_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5821_cast_fp16 = mul(x = x2_109_cast_fp16, y = const_58_promoted_to_fp16)[name = string("op_5821_cast_fp16")]; bool var_5823_interleave_0 = const()[name = string("op_5823_interleave_0"), val = bool(false)]; tensor var_5823_cast_fp16 = concat(axis = var_72, interleave = var_5823_interleave_0, values = (var_5821_cast_fp16, x1_109_cast_fp16))[name = string("op_5823_cast_fp16")]; tensor var_5824_cast_fp16 = mul(x = var_5823_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5824_cast_fp16")]; tensor query_109_cast_fp16 = add(x = var_5810_cast_fp16, y = var_5824_cast_fp16)[name = string("query_109_cast_fp16")]; tensor k_cast_fp16 = transpose(perm = k_perm_0, x = var_5801_cast_fp16)[name = string("transpose_2")]; tensor var_5826_cast_fp16 = mul(x = k_cast_fp16, y = cos_5_cast_fp16)[name = string("op_5826_cast_fp16")]; tensor x1_begin_0 = const()[name = string("x1_begin_0"), val = tensor([0, 0, 0, 0])]; tensor x1_end_0 = const()[name = string("x1_end_0"), val = tensor([1, 8, 0, 64])]; tensor x1_end_mask_0 = const()[name = string("x1_end_mask_0"), val = tensor([true, true, true, false])]; tensor x1_cast_fp16 = slice_by_index(begin = x1_begin_0, end = x1_end_0, end_mask = x1_end_mask_0, x = k_cast_fp16)[name = string("x1_cast_fp16")]; tensor x2_begin_0 = const()[name = string("x2_begin_0"), val = tensor([0, 0, 0, 64])]; tensor x2_end_0 = const()[name = string("x2_end_0"), val = tensor([1, 8, 0, 128])]; tensor x2_end_mask_0 = const()[name = string("x2_end_mask_0"), val = tensor([true, true, true, true])]; tensor x2_cast_fp16 = slice_by_index(begin = x2_begin_0, end = x2_end_0, end_mask = x2_end_mask_0, x = k_cast_fp16)[name = string("x2_cast_fp16")]; fp16 const_59_promoted_to_fp16 = const()[name = string("const_59_promoted_to_fp16"), val = fp16(-0x1p+0)]; tensor var_5837_cast_fp16 = mul(x = x2_cast_fp16, y = const_59_promoted_to_fp16)[name = string("op_5837_cast_fp16")]; bool var_5839_interleave_0 = const()[name = string("op_5839_interleave_0"), val = bool(false)]; tensor var_5839_cast_fp16 = concat(axis = var_72, interleave = var_5839_interleave_0, values = (var_5837_cast_fp16, x1_cast_fp16))[name = string("op_5839_cast_fp16")]; tensor var_5840_cast_fp16 = mul(x = var_5839_cast_fp16, y = sin_5_cast_fp16)[name = string("op_5840_cast_fp16")]; tensor k_state_cast_fp16 = add(x = var_5826_cast_fp16, y = var_5840_cast_fp16)[name = string("k_state_cast_fp16")]; tensor expand_dims_324 = const()[name = string("expand_dims_324"), val = tensor([0])]; tensor expand_dims_325 = const()[name = string("expand_dims_325"), val = tensor([0])]; tensor expand_dims_327 = const()[name = string("expand_dims_327"), val = tensor([0])]; tensor concat_519_values0_0 = const()[name = string("concat_519_values0_0"), val = tensor([27])]; int32 concat_519_axis_0 = const()[name = string("concat_519_axis_0"), val = int32(0)]; bool concat_519_interleave_0 = const()[name = string("concat_519_interleave_0"), val = bool(false)]; tensor concat_519 = concat(axis = concat_519_axis_0, interleave = concat_519_interleave_0, values = (concat_519_values0_0, expand_dims_324, expand_dims_325, expand_dims_2, expand_dims_327))[name = string("concat_519")]; tensor key_cache_internal_tensor_assign_28_stride_0 = const()[name = string("key_cache_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor key_cache_internal_tensor_assign_28_begin_mask_0 = const()[name = string("key_cache_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor key_cache_internal_tensor_assign_28_end_mask_0 = const()[name = string("key_cache_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor key_cache_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("key_cache_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor key_cache_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_519, begin_mask = key_cache_internal_tensor_assign_28_begin_mask_0, end = concat_7, end_mask = key_cache_internal_tensor_assign_28_end_mask_0, squeeze_mask = key_cache_internal_tensor_assign_28_squeeze_mask_0, stride = key_cache_internal_tensor_assign_28_stride_0, update = k_state_cast_fp16, x = coreml_update_state_108)[name = string("key_cache_internal_tensor_assign_28_cast_fp16")]; write_state(data = key_cache_internal_tensor_assign_28_cast_fp16, input = key_cache)[name = string("coreml_update_state_110_write_state")]; tensor coreml_update_state_110 = read_state(input = key_cache)[name = string("coreml_update_state_110")]; tensor value_cache_internal_tensor_assign_28_stride_0 = const()[name = string("value_cache_internal_tensor_assign_28_stride_0"), val = tensor([1, 1, 1, 1, 1])]; tensor value_cache_internal_tensor_assign_28_begin_mask_0 = const()[name = string("value_cache_internal_tensor_assign_28_begin_mask_0"), val = tensor([false, false, false, false, false])]; tensor value_cache_internal_tensor_assign_28_end_mask_0 = const()[name = string("value_cache_internal_tensor_assign_28_end_mask_0"), val = tensor([false, true, false, false, true])]; tensor value_cache_internal_tensor_assign_28_squeeze_mask_0 = const()[name = string("value_cache_internal_tensor_assign_28_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor v_state_cast_fp16 = transpose(perm = v_state_perm_0, x = var_5806_cast_fp16)[name = string("transpose_1")]; tensor value_cache_internal_tensor_assign_28_cast_fp16 = slice_update(begin = concat_519, begin_mask = value_cache_internal_tensor_assign_28_begin_mask_0, end = concat_7, end_mask = value_cache_internal_tensor_assign_28_end_mask_0, squeeze_mask = value_cache_internal_tensor_assign_28_squeeze_mask_0, stride = value_cache_internal_tensor_assign_28_stride_0, update = v_state_cast_fp16, x = coreml_update_state_109)[name = string("value_cache_internal_tensor_assign_28_cast_fp16")]; write_state(data = value_cache_internal_tensor_assign_28_cast_fp16, input = value_cache)[name = string("coreml_update_state_111_write_state")]; tensor coreml_update_state_111 = read_state(input = value_cache)[name = string("coreml_update_state_111")]; tensor var_5863_begin_0 = const()[name = string("op_5863_begin_0"), val = tensor([27, 0, 0, 0, 0])]; tensor var_5863_end_0 = const()[name = string("op_5863_end_0"), val = tensor([28, 1, 8, 2048, 128])]; tensor var_5863_end_mask_0 = const()[name = string("op_5863_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5863_squeeze_mask_0 = const()[name = string("op_5863_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_5863_cast_fp16 = slice_by_index(begin = var_5863_begin_0, end = var_5863_end_0, end_mask = var_5863_end_mask_0, squeeze_mask = var_5863_squeeze_mask_0, x = coreml_update_state_110)[name = string("op_5863_cast_fp16")]; tensor var_5866_begin_0 = const()[name = string("op_5866_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5866_end_mask_0 = const()[name = string("op_5866_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5866_cast_fp16 = slice_by_index(begin = var_5866_begin_0, end = concat_12, end_mask = var_5866_end_mask_0, x = var_5863_cast_fp16)[name = string("op_5866_cast_fp16")]; tensor var_5868_begin_0 = const()[name = string("op_5868_begin_0"), val = tensor([27, 0, 0, 0, 0])]; tensor var_5868_end_0 = const()[name = string("op_5868_end_0"), val = tensor([28, 1, 8, 2048, 128])]; tensor var_5868_end_mask_0 = const()[name = string("op_5868_end_mask_0"), val = tensor([false, true, true, true, true])]; tensor var_5868_squeeze_mask_0 = const()[name = string("op_5868_squeeze_mask_0"), val = tensor([true, false, false, false, false])]; tensor var_5868_cast_fp16 = slice_by_index(begin = var_5868_begin_0, end = var_5868_end_0, end_mask = var_5868_end_mask_0, squeeze_mask = var_5868_squeeze_mask_0, x = coreml_update_state_111)[name = string("op_5868_cast_fp16")]; tensor var_5871_begin_0 = const()[name = string("op_5871_begin_0"), val = tensor([0, 0, 0, 0])]; tensor var_5871_end_mask_0 = const()[name = string("op_5871_end_mask_0"), val = tensor([true, true, false, true])]; tensor var_5871_cast_fp16 = slice_by_index(begin = var_5871_begin_0, end = concat_12, end_mask = var_5871_end_mask_0, x = var_5868_cast_fp16)[name = string("op_5871_cast_fp16")]; tensor var_5873_shape_cast_fp16 = shape(x = var_5866_cast_fp16)[name = string("op_5873_shape_cast_fp16")]; int32 gather_499 = const()[name = string("gather_499"), val = int32(1)]; int32 gather_500 = const()[name = string("gather_500"), val = int32(8)]; int32 gather_501_axis_0 = const()[name = string("gather_501_axis_0"), val = int32(0)]; int32 gather_501_batch_dims_0 = const()[name = string("gather_501_batch_dims_0"), val = int32(0)]; bool gather_501_validate_indices_0 = const()[name = string("gather_501_validate_indices_0"), val = bool(false)]; string var_5873_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5873_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_501_to_uint16 = const()[name = string("select_501_to_uint16"), val = uint16(2)]; tensor var_5873_shape_cast_fp16_to_uint16 = cast(dtype = var_5873_shape_cast_fp16_to_uint16_dtype_0, x = var_5873_shape_cast_fp16)[name = string("cast_520")]; uint16 gather_501_cast_uint16 = gather(axis = gather_501_axis_0, batch_dims = gather_501_batch_dims_0, indices = select_501_to_uint16, validate_indices = gather_501_validate_indices_0, x = var_5873_shape_cast_fp16_to_uint16)[name = string("gather_501_cast_uint16")]; string gather_501_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_501_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_502 = const()[name = string("gather_502"), val = int32(128)]; tensor var_5880_axes_0 = const()[name = string("op_5880_axes_0"), val = tensor([2])]; tensor var_5880_cast_fp16 = expand_dims(axes = var_5880_axes_0, x = var_5866_cast_fp16)[name = string("op_5880_cast_fp16")]; int32 concat_527_axis_0 = const()[name = string("concat_527_axis_0"), val = int32(0)]; bool concat_527_interleave_0 = const()[name = string("concat_527_interleave_0"), val = bool(false)]; int32 gather_501_cast_uint16_to_int32 = cast(dtype = gather_501_cast_uint16_to_int32_dtype_0, x = gather_501_cast_uint16)[name = string("cast_519")]; tensor concat_527 = concat(axis = concat_527_axis_0, interleave = concat_527_interleave_0, values = (gather_499, gather_500, var_78, gather_501_cast_uint16_to_int32, gather_502))[name = string("concat_527")]; tensor shape_557_cast_fp16 = shape(x = var_5880_cast_fp16)[name = string("shape_557_cast_fp16")]; tensor real_div_54 = real_div(x = concat_527, y = shape_557_cast_fp16)[name = string("real_div_54")]; tensor hidden_states_1269_cast_fp16 = tile(reps = real_div_54, x = var_5880_cast_fp16)[name = string("hidden_states_1269_cast_fp16")]; tensor concat_528x = const()[name = string("concat_528x"), val = tensor([1, 16, -1, 128])]; tensor key_109_cast_fp16 = reshape(shape = concat_528x, x = hidden_states_1269_cast_fp16)[name = string("key_109_cast_fp16")]; tensor var_5890_shape_cast_fp16 = shape(x = var_5871_cast_fp16)[name = string("op_5890_shape_cast_fp16")]; int32 gather_503 = const()[name = string("gather_503"), val = int32(1)]; int32 gather_504 = const()[name = string("gather_504"), val = int32(8)]; int32 gather_505_axis_0 = const()[name = string("gather_505_axis_0"), val = int32(0)]; int32 gather_505_batch_dims_0 = const()[name = string("gather_505_batch_dims_0"), val = int32(0)]; bool gather_505_validate_indices_0 = const()[name = string("gather_505_validate_indices_0"), val = bool(false)]; string var_5890_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5890_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_505_to_uint16 = const()[name = string("select_505_to_uint16"), val = uint16(2)]; tensor var_5890_shape_cast_fp16_to_uint16 = cast(dtype = var_5890_shape_cast_fp16_to_uint16_dtype_0, x = var_5890_shape_cast_fp16)[name = string("cast_518")]; uint16 gather_505_cast_uint16 = gather(axis = gather_505_axis_0, batch_dims = gather_505_batch_dims_0, indices = select_505_to_uint16, validate_indices = gather_505_validate_indices_0, x = var_5890_shape_cast_fp16_to_uint16)[name = string("gather_505_cast_uint16")]; string gather_505_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_505_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 gather_506 = const()[name = string("gather_506"), val = int32(128)]; tensor var_5897_axes_0 = const()[name = string("op_5897_axes_0"), val = tensor([2])]; tensor var_5897_cast_fp16 = expand_dims(axes = var_5897_axes_0, x = var_5871_cast_fp16)[name = string("op_5897_cast_fp16")]; int32 concat_529_axis_0 = const()[name = string("concat_529_axis_0"), val = int32(0)]; bool concat_529_interleave_0 = const()[name = string("concat_529_interleave_0"), val = bool(false)]; int32 gather_505_cast_uint16_to_int32 = cast(dtype = gather_505_cast_uint16_to_int32_dtype_0, x = gather_505_cast_uint16)[name = string("cast_517")]; tensor concat_529 = concat(axis = concat_529_axis_0, interleave = concat_529_interleave_0, values = (gather_503, gather_504, var_78, gather_505_cast_uint16_to_int32, gather_506))[name = string("concat_529")]; tensor shape_562_cast_fp16 = shape(x = var_5897_cast_fp16)[name = string("shape_562_cast_fp16")]; tensor real_div_55 = real_div(x = concat_529, y = shape_562_cast_fp16)[name = string("real_div_55")]; tensor hidden_states_1273_cast_fp16 = tile(reps = real_div_55, x = var_5897_cast_fp16)[name = string("hidden_states_1273_cast_fp16")]; tensor concat_530x = const()[name = string("concat_530x"), val = tensor([1, 16, -1, 128])]; tensor value_109_cast_fp16 = reshape(shape = concat_530x, x = hidden_states_1273_cast_fp16)[name = string("value_109_cast_fp16")]; tensor var_5907_shape_cast_fp16 = shape(x = key_109_cast_fp16)[name = string("op_5907_shape_cast_fp16")]; int32 gather_507_axis_0 = const()[name = string("gather_507_axis_0"), val = int32(0)]; int32 gather_507_batch_dims_0 = const()[name = string("gather_507_batch_dims_0"), val = int32(0)]; bool gather_507_validate_indices_0 = const()[name = string("gather_507_validate_indices_0"), val = bool(false)]; string var_5907_shape_cast_fp16_to_uint16_dtype_0 = const()[name = string("op_5907_shape_cast_fp16_to_uint16_dtype_0"), val = string("uint16")]; uint16 select_507_to_uint16 = const()[name = string("select_507_to_uint16"), val = uint16(2)]; tensor var_5907_shape_cast_fp16_to_uint16 = cast(dtype = var_5907_shape_cast_fp16_to_uint16_dtype_0, x = var_5907_shape_cast_fp16)[name = string("cast_516")]; uint16 gather_507_cast_uint16 = gather(axis = gather_507_axis_0, batch_dims = gather_507_batch_dims_0, indices = select_507_to_uint16, validate_indices = gather_507_validate_indices_0, x = var_5907_shape_cast_fp16_to_uint16)[name = string("gather_507_cast_uint16")]; string gather_507_cast_uint16_to_int32_dtype_0 = const()[name = string("gather_507_cast_uint16_to_int32_dtype_0"), val = string("int32")]; int32 concat_531_values0_0 = const()[name = string("concat_531_values0_0"), val = int32(1)]; int32 concat_531_values1_0 = const()[name = string("concat_531_values1_0"), val = int32(1)]; int32 concat_531_values2_0 = const()[name = string("concat_531_values2_0"), val = int32(0)]; int32 concat_531_axis_0 = const()[name = string("concat_531_axis_0"), val = int32(0)]; bool concat_531_interleave_0 = const()[name = string("concat_531_interleave_0"), val = bool(false)]; int32 gather_507_cast_uint16_to_int32 = cast(dtype = gather_507_cast_uint16_to_int32_dtype_0, x = gather_507_cast_uint16)[name = string("cast_515")]; tensor concat_531 = concat(axis = concat_531_axis_0, interleave = concat_531_interleave_0, values = (concat_531_values0_0, concat_531_values1_0, concat_531_values2_0, gather_507_cast_uint16_to_int32))[name = string("concat_531")]; tensor attention_mask_begin_0 = const()[name = string("attention_mask_begin_0"), val = tensor([0, 0, 0, 0])]; tensor attention_mask_end_mask_0 = const()[name = string("attention_mask_end_mask_0"), val = tensor([true, true, true, false])]; tensor attention_mask_cast_fp16 = slice_by_index(begin = attention_mask_begin_0, end = concat_531, end_mask = attention_mask_end_mask_0, x = causal_mask)[name = string("attention_mask_cast_fp16")]; tensor mul_27_cast_fp16 = mul(x = query_109_cast_fp16, y = var_85_to_fp16)[name = string("mul_27_cast_fp16")]; bool matmul_27_transpose_y_0 = const()[name = string("matmul_27_transpose_y_0"), val = bool(true)]; bool matmul_27_transpose_x_0 = const()[name = string("matmul_27_transpose_x_0"), val = bool(false)]; tensor matmul_27_cast_fp16 = matmul(transpose_x = matmul_27_transpose_x_0, transpose_y = matmul_27_transpose_y_0, x = mul_27_cast_fp16, y = key_109_cast_fp16)[name = string("matmul_27_cast_fp16")]; tensor add_535_cast_fp16 = add(x = matmul_27_cast_fp16, y = attention_mask_cast_fp16)[name = string("add_535_cast_fp16")]; int32 softmax_27_axis_0 = const()[name = string("softmax_27_axis_0"), val = int32(-1)]; tensor softmax_27_cast_fp16 = softmax(axis = softmax_27_axis_0, x = add_535_cast_fp16)[name = string("softmax_27_cast_fp16")]; bool attn_output_109_transpose_x_0 = const()[name = string("attn_output_109_transpose_x_0"), val = bool(false)]; bool attn_output_109_transpose_y_0 = const()[name = string("attn_output_109_transpose_y_0"), val = bool(false)]; tensor attn_output_109_cast_fp16 = matmul(transpose_x = attn_output_109_transpose_x_0, transpose_y = attn_output_109_transpose_y_0, x = softmax_27_cast_fp16, y = value_109_cast_fp16)[name = string("attn_output_109_cast_fp16")]; tensor var_5916_perm_0 = const()[name = string("op_5916_perm_0"), val = tensor([0, 2, 1, 3])]; int32 concat_532_axis_0 = const()[name = string("concat_532_axis_0"), val = int32(0)]; bool concat_532_interleave_0 = const()[name = string("concat_532_interleave_0"), val = bool(false)]; int32 gather_491_cast_uint16_to_int32 = cast(dtype = gather_491_cast_uint16_to_int32_dtype_0, x = gather_491_cast_uint16)[name = string("cast_521")]; tensor concat_532 = concat(axis = concat_532_axis_0, interleave = concat_532_interleave_0, values = (gather_490, gather_491_cast_uint16_to_int32, var_72))[name = string("concat_532")]; tensor var_5916_cast_fp16 = transpose(perm = var_5916_perm_0, x = attn_output_109_cast_fp16)[name = string("transpose_0")]; tensor var_5919_cast_fp16 = reshape(shape = concat_532, x = var_5916_cast_fp16)[name = string("op_5919_cast_fp16")]; tensor model_model_layers_27_self_attn_o_proj_weight_to_fp16 = const()[name = string("model_model_layers_27_self_attn_o_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1169059264)))]; tensor linear_192_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_27_self_attn_o_proj_weight_to_fp16, x = var_5919_cast_fp16)[name = string("linear_192_cast_fp16")]; tensor hidden_states_1277_cast_fp16 = add(x = hidden_states_1241_cast_fp16, y = linear_192_cast_fp16)[name = string("hidden_states_1277_cast_fp16")]; fp16 var_78_promoted_111_to_fp16 = const()[name = string("op_78_promoted_111_to_fp16"), val = fp16(0x1p+1)]; tensor var_5926_cast_fp16 = pow(x = hidden_states_1277_cast_fp16, y = var_78_promoted_111_to_fp16)[name = string("op_5926_cast_fp16")]; tensor variance_223_axes_0 = const()[name = string("variance_223_axes_0"), val = tensor([-1])]; bool variance_223_keep_dims_0 = const()[name = string("variance_223_keep_dims_0"), val = bool(true)]; tensor variance_223_cast_fp16 = reduce_mean(axes = variance_223_axes_0, keep_dims = variance_223_keep_dims_0, x = var_5926_cast_fp16)[name = string("variance_223_cast_fp16")]; fp16 var_5929_to_fp16 = const()[name = string("op_5929_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5930_cast_fp16 = add(x = variance_223_cast_fp16, y = var_5929_to_fp16)[name = string("op_5930_cast_fp16")]; fp32 var_5931_epsilon_0 = const()[name = string("op_5931_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5931_cast_fp16 = rsqrt(epsilon = var_5931_epsilon_0, x = var_5930_cast_fp16)[name = string("op_5931_cast_fp16")]; tensor hidden_states_1281_cast_fp16 = mul(x = hidden_states_1277_cast_fp16, y = var_5931_cast_fp16)[name = string("hidden_states_1281_cast_fp16")]; tensor model_model_layers_27_post_attention_layernorm_weight_to_fp16 = const()[name = string("model_model_layers_27_post_attention_layernorm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1173253632)))]; tensor input_219_cast_fp16 = mul(x = model_model_layers_27_post_attention_layernorm_weight_to_fp16, y = hidden_states_1281_cast_fp16)[name = string("input_219_cast_fp16")]; tensor model_model_layers_27_mlp_gate_proj_weight_to_fp16 = const()[name = string("model_model_layers_27_mlp_gate_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1173255744)))]; tensor linear_193_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_27_mlp_gate_proj_weight_to_fp16, x = input_219_cast_fp16)[name = string("linear_193_cast_fp16")]; tensor var_5943_cast_fp16 = silu(x = linear_193_cast_fp16)[name = string("op_5943_cast_fp16")]; tensor model_model_layers_27_mlp_up_proj_weight_to_fp16 = const()[name = string("model_model_layers_27_mlp_up_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1179547264)))]; tensor linear_194_cast_fp16 = linear(bias = linear_4_bias_0_to_fp16, weight = model_model_layers_27_mlp_up_proj_weight_to_fp16, x = input_219_cast_fp16)[name = string("linear_194_cast_fp16")]; tensor input_223_cast_fp16 = mul(x = var_5943_cast_fp16, y = linear_194_cast_fp16)[name = string("input_223_cast_fp16")]; tensor model_model_layers_27_mlp_down_proj_weight_to_fp16 = const()[name = string("model_model_layers_27_mlp_down_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1185838784)))]; tensor linear_195_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = model_model_layers_27_mlp_down_proj_weight_to_fp16, x = input_223_cast_fp16)[name = string("linear_195_cast_fp16")]; tensor hidden_states_1287_cast_fp16 = add(x = hidden_states_1277_cast_fp16, y = linear_195_cast_fp16)[name = string("hidden_states_1287_cast_fp16")]; fp16 var_78_promoted_112_to_fp16 = const()[name = string("op_78_promoted_112_to_fp16"), val = fp16(0x1p+1)]; tensor var_5952_cast_fp16 = pow(x = hidden_states_1287_cast_fp16, y = var_78_promoted_112_to_fp16)[name = string("op_5952_cast_fp16")]; tensor variance_axes_0 = const()[name = string("variance_axes_0"), val = tensor([-1])]; bool variance_keep_dims_0 = const()[name = string("variance_keep_dims_0"), val = bool(true)]; tensor variance_cast_fp16 = reduce_mean(axes = variance_axes_0, keep_dims = variance_keep_dims_0, x = var_5952_cast_fp16)[name = string("variance_cast_fp16")]; fp16 var_5955_to_fp16 = const()[name = string("op_5955_to_fp16"), val = fp16(0x1.1p-20)]; tensor var_5956_cast_fp16 = add(x = variance_cast_fp16, y = var_5955_to_fp16)[name = string("op_5956_cast_fp16")]; fp32 var_5957_epsilon_0 = const()[name = string("op_5957_epsilon_0"), val = fp32(0x1.197998p-40)]; tensor var_5957_cast_fp16 = rsqrt(epsilon = var_5957_epsilon_0, x = var_5956_cast_fp16)[name = string("op_5957_cast_fp16")]; tensor hidden_states_1291_cast_fp16 = mul(x = hidden_states_1287_cast_fp16, y = var_5957_cast_fp16)[name = string("hidden_states_1291_cast_fp16")]; tensor model_model_norm_weight_to_fp16 = const()[name = string("model_model_norm_weight_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1192130304)))]; tensor hidden_states_cast_fp16 = mul(x = model_model_norm_weight_to_fp16, y = hidden_states_1291_cast_fp16)[name = string("hidden_states_cast_fp16")]; tensor linear_196_bias_0_to_fp16 = const()[name = string("linear_196_bias_0_to_fp16"), val = tensor(BLOBFILE(path = string("@model_path/weights/weight.bin"), offset = uint64(1192132416)))]; tensor logits = linear(bias = linear_196_bias_0_to_fp16, weight = model_model_embed_tokens_weight_to_fp16, x = hidden_states_cast_fp16)[name = string("linear_196_cast_fp16")]; } -> (logits); }