dacorvo HF Staff commited on 3 days ago

Commit

71993be

verified ·

1 Parent(s): 9b0c04c

Synchronizing local compiler cache.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +19 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-0.6B/11315149f2fec5f9b1e3.json +87 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/040b1e23663eba2981b3.json +87 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/6f5f2cf26c21b525ca82.json +87 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/8f2f6fc022fb92a08835.json +87 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/922b8f110b9e3fdaa766.json +87 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-8B/3c013915c647bb8e6712.json +95 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-8B/b60a4e37d9a2dbabe961.json +95 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.hlo_module.pb +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/model.log +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.hlo_module.pb +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.neff +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.hlo_module.pb +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/compile_flags.json +1 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.done +0 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.hlo_module.pb +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.neff +3 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/compile_flags.json +1 -0

.gitattributes CHANGED Viewed

@@ -5466,3 +5466,22 @@ neuronxcc-2.21.18209.0+043b1bf7/MODULE_b3156313afcf6f9c1c2b+24129607/model.neff
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_ea6d0b2d03e12e6e6709+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_f668e8059b4ad00c098b+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_f668e8059b4ad00c098b+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text

 neuronxcc-2.21.18209.0+043b1bf7/MODULE_ea6d0b2d03e12e6e6709+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_f668e8059b4ad00c098b+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.21.18209.0+043b1bf7/MODULE_f668e8059b4ad00c098b+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_823cb759235f53a8fe88+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_a0387fe4d05bf37e345a+a02c3a36/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_af5ca2287a6b0d70aac7+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_bd3408e2c963ac7da056+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_c345ba838e8c529a43e2+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_cb4d848c61c58d4a5351+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_ce7eb4c5ef357b61cc09+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_da126386d4d4d4a5bf4f+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_dd4aa1e96fd3b31b162d+24129607/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_de1482f217f05f48d1dd+fb4cc044/model.neff filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.21.18209.0+043b1bf7/MODULE_f4bed0e6c1426c4b9bc7+24129607/model.neff filter=lfs diff=lfs merge=lfs -text

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-0.6B/11315149f2fec5f9b1e3.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-0.6B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-0.6B",
+    "checkpoint_revision": "c1899de289a04d12100db370d81485cdf75e47ca",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.2.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/040b1e23663eba2981b3.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 6,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 24,
+    "max_batch_size": 6,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.2.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 24
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/6f5f2cf26c21b525ca82.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 1,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.2.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 1
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/8f2f6fc022fb92a08835.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 24,
+    "max_batch_size": 4,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.2.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 24
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-0.6B/922b8f110b9e3fdaa766.json ADDED Viewed

	@@ -0,0 +1,87 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-0.6B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-0.6B",
+    "checkpoint_revision": "c54f2e6e80b2d7b7de06f51cec4959f6b3e03418",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 24,
+    "max_batch_size": 1,
+    "max_context_length": 1024,
+    "max_topk": 256,
+    "n_active_tokens": 1024,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.2.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 1024,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 24
+  },
+  "num_attention_heads": 16,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151669
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-8B/3c013915c647bb8e6712.json ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.2.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.2.dev0/qwen3/Qwen/Qwen3-Embedding-8B/b60a4e37d9a2dbabe961.json ADDED Viewed

	@@ -0,0 +1,95 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen3-Embedding-8B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen3ForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "dtype": "bfloat16",
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "qwen3",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 16,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen3-Embedding-8B",
+    "checkpoint_revision": "1d8ad4ca9b3dd8059ad90a75d4983776a23d44af",
+    "continuous_batching": false,
+    "ep_degree": 1,
+    "fused_qkv": true,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 16,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.2.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 32,
+  "num_hidden_layers": 36,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151665
+}

neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7607df5f2c7c5132d87730619aa3366ddb2035e223a239aa5a14a1c9d7469d57
+size 848773

neuronxcc-2.21.18209.0+043b1bf7/MODULE_06cc3d220bd81f31483c+fb4cc044/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7d799faaff5e25a7531d12f187cea88faf79b29e045c2b88d2c3ea0b8bd94cab
+size 5786624

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:00ef644e5979852b4384b292b9654db3d05671a18e7c2997041f8228841c496b
 size 84807

 version https://git-lfs.github.com/spec/v1
+oid sha256:ceb0a23b716925f843a16e15f9f0385f067950c8f9f55cd7b3db583ba03b1a7e
 size 84807

neuronxcc-2.21.18209.0+043b1bf7/MODULE_0e5b82b136a38372d842+6e4949b4/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d8c78397b4802378ef5aed6265b491f7d9f2c88d33d42c672b8394aa40fbad78
 size 646144

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f26bc2e40e6e21bffa40348dd28785a0c5ba6ae26126553c85435559849af5d
 size 646144

neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be0a2ca1ec6303e7c7f1f96a0c58635b22fcc2c5566988781cf2876a2f11f9d6
 size 277504

 version https://git-lfs.github.com/spec/v1
+oid sha256:690971e38e78e6e8e2e0b2249904244dbe845f18a1d36cb2b763deac5fe550f3
 size 277504

neuronxcc-2.21.18209.0+043b1bf7/MODULE_306829ff9e3811933fe7+a02c3a36/wrapped_neff.hlo CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1941f77c35018b572d2296e153bda0c1a82b578f3486e690f2e095e5936a2009
 size 285854

 version https://git-lfs.github.com/spec/v1
+oid sha256:937aba417c7465954a0bf71e8d536e6ce2db41d44b477deb724f8f6154f76e19
 size 285854

neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:949343b64aaeb75ac893058b4181a58759ab30c14ab879041244bb9af4356695
+size 850786

neuronxcc-2.21.18209.0+043b1bf7/MODULE_315d9491c827c077575a+fb4cc044/model.log ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_315d9491c827c077575a+fb4cc044.hlo_module.pb', '--output', '/tmp/nxd_model/encoding/_tp0_bk0/model.MODULE_315d9491c827c077575a+fb4cc044.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']: [XCG815] Estimated peak HBM usage (19.257GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
2	+ 2025-11-07T14:10:10Z Non-signal exit. Backend exited with code 1 and stderr: [XCG815] Estimated peak HBM usage (19.257GB) exceeds 16GB. Neff might be unable to load on chip. If you believe this estimation to be inaccurate, you can disable the check using: `--internal-backend-options=' --disable-hbm-usage-check '` - Please open a support ticket at https://github.com/aws-neuron/aws-neuron-sdk/issues/new. You may also be able to obtain more information using the 'XLA_IR_DEBUG' and 'XLA_HLO_DEBUG' environment variables.
3	+

neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:34e6c39ae5aaa2365e0018b111269da40c42b131faed18711a8810bedd68a436
+size 429397

neuronxcc-2.21.18209.0+043b1bf7/MODULE_3321b4e97123c041192d+fb4cc044/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0135e8fa145738be49848914c2dbb2c52c3854a2403644f1842244e432ed0fe
+size 1352704

neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d03c6300c55f1b499d9c5d804143ff8484a494001339977d2f2296b1ce816fce
 size 246784

 version https://git-lfs.github.com/spec/v1
+oid sha256:adc574992e72adfe26c916de4e344ec745c5f0367620c1183437c3127f9fef15
 size 246784

neuronxcc-2.21.18209.0+043b1bf7/MODULE_34362baee2ad4784108e+a02c3a36/wrapped_neff.hlo CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2bdbe143e93e40a8157c3303918d7b4ab8d3c4603949e1cf14e9bcf86f43e8aa
 size 255104

 version https://git-lfs.github.com/spec/v1
+oid sha256:6e3995a13290761f8f6f89854871f6a5da48002e637efad7b67cec72e826cc1e
 size 255104

neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a36ee034700342382b6da2932f74a5d63b34f8b66d47da8dcd5b0cf42842dd7
+size 428915

neuronxcc-2.21.18209.0+043b1bf7/MODULE_394461f5b793e1c6ddbd+fb4cc044/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e2978f644a5df5278f76096838837e039d1d5a3dc2cf4e9c2a6cae1784f9bec0
+size 4199424

neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c3d4458ffd347738badcf0b23d0d413a4d36cf853e122a45c2206886aebff211
 size 216064

 version https://git-lfs.github.com/spec/v1
+oid sha256:40684a5208697e733fd92b531e426c9a49e2e2f3dce8fdc81afe434818f265f2
 size 216064

neuronxcc-2.21.18209.0+043b1bf7/MODULE_3e3c9e9db5ebafa6c488+a02c3a36/wrapped_neff.hlo CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0fe259afa434f354cd2ea4cbfa55677adfcd4de255474e7d13f3ee086220a609
 size 224412

 version https://git-lfs.github.com/spec/v1
+oid sha256:e490cb2df34ae3582e58811ed1a9ea678ca780e9525b51113490839860539904
 size 224412

neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5f01851ab7ef0b0809a16c7e907029400056497364b603f59ab2c9f38e6cfc9d
+size 427538

neuronxcc-2.21.18209.0+043b1bf7/MODULE_45c1a0c25eab451751df+fb4cc044/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:37d530cd99cdeac86375ed3d4ee421b5db73c2cc8ec537732e3f811d227f4c1e
+size 4158464

neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b830e756a066effbee9464b2f0ee872654f088ddd85afe47df42d75321a6961b
 size 1061093

 version https://git-lfs.github.com/spec/v1
+oid sha256:87c27fc5e9a14dd0d728587d4b5e633e65e6902974129e23c485ea65109a7a63
 size 1061093

neuronxcc-2.21.18209.0+043b1bf7/MODULE_574e9a764503b48fbba9+24129607/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2f3d938502148f35e5dcbd5ffabf9a05f9faf681ad9f9e99b65cf52a927b8cce
 size 9473024

 version https://git-lfs.github.com/spec/v1
+oid sha256:13ca12a9af7e210d81d5d48764b74bea3ad2c8dee1d5f6ec04b511070e5aa4ca
 size 9473024

neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd6b6bf0feffa5e590b798a1c60710db88bec1cb49364a0829ce08a05370a91d
+size 435194

neuronxcc-2.21.18209.0+043b1bf7/MODULE_5988b4b9e2fe27c7105c+fb4cc044/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76f32b334d4b29c0442354fbb4970d0103862810dbab9dd6e1d90f2943d2fad5
+size 4731904

neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4108281a03ea281002e10678a110e666893df5d987032383a0e7ad4e096dbeaa
 size 369664

 version https://git-lfs.github.com/spec/v1
+oid sha256:c0d1a66ec4620a7bbd95ddcd3f0b8563e7b9fd48c9167bbb83e09de5ea8f2045
 size 369664

neuronxcc-2.21.18209.0+043b1bf7/MODULE_5b1c456704a89716d745+2dde74c7/wrapped_neff.hlo CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:90c34fd1fb81a015236d8dc844fa950ab15b39579c066d78a8bec47f68c41d2c
 size 379362

 version https://git-lfs.github.com/spec/v1
+oid sha256:6d780e7908fb2043598326a014149c1adc7cf79ede14eaac7e57a0efd5aaaf1f
 size 379362

neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f1fe38c017b84f73b7d3ea59c6211db2ed9d5c219e724f7fa46268df0122085b
 size 694128

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b57a81178b87e2fa8d72c53f158790060e6f323a7ea625353c0c2ecec75b33d
 size 694128

neuronxcc-2.21.18209.0+043b1bf7/MODULE_5f6f4f62aba29e11d946+24129607/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d1e4c997fc85f924fab368f5e7c1be4735d973e2370b8619493ad63aa1e5acbf
 size 625664

 version https://git-lfs.github.com/spec/v1
+oid sha256:b94b84c6a5ac7142334a119a16754827bff07c49ce7dd2d1b9f71d6c2d29a755
 size 625664

neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/encoding/_tp0_bk0/log-neuron-cc.txt"]

neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.done ADDED Viewed

File without changes

neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.hlo_module.pb ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9ec5e1a252b77227b0d8bf55d0ad8984aaaf05d34c12fbff567fc48bf7f1f98e
+size 431779

neuronxcc-2.21.18209.0+043b1bf7/MODULE_616029436a1201c3f014+fb4cc044/model.neff ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b15884b84559dd7029720a5a83714e250e569eee383b805015a1b0a990e58ebc
+size 4158464

neuronxcc-2.21.18209.0+043b1bf7/MODULE_6b37881de0fe95bd8128+24129607/compile_flags.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding/_tp0_bk0/log-neuron-cc.txt"]