Add Neuron cache for cache_mirror

🤖 Neuron Cache Bot: Adding compiled Neuron cache artifacts.

This PR contains the compiled neuronxcc cache files that can be used to speed up model loading for AWS Neuron devices.

Files changed (8) hide show

.gitattributes +1 -0
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0/roberta/klue/roberta-small/9ec67f86344a927b0236.json +51 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/config.json +55 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/model.neuron +3 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/special_tokens_map.json +51 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/tokenizer.json +0 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/tokenizer_config.json +60 -0
neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/vocab.txt +0 -0

.gitattributes CHANGED Viewed

@@ -44,3 +44,4 @@ neuronxcc-2.16.372.0+4a9b2326/MODULE_7bebb00dc9acea881b5a/model.neuron filter=lf
 neuronxcc-2.16.372.0+4a9b2326/MODULE_42c7c80374d4e8712757/model.neuron filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.16.372.0+4a9b2326/MODULE_fce72056449004b51385/model.neuron filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.19.8089.0+8ab9f450/MODULE_925eeddec3d7c8386722/model.neuron filter=lfs diff=lfs merge=lfs -text

 neuronxcc-2.16.372.0+4a9b2326/MODULE_42c7c80374d4e8712757/model.neuron filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.16.372.0+4a9b2326/MODULE_fce72056449004b51385/model.neuron filter=lfs diff=lfs merge=lfs -text
 neuronxcc-2.19.8089.0+8ab9f450/MODULE_925eeddec3d7c8386722/model.neuron filter=lfs diff=lfs merge=lfs -text
+neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/model.neuron filter=lfs diff=lfs merge=lfs -text

neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.0/roberta/klue/roberta-small/9ec67f86344a927b0236.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "klue/roberta-small",
+  "_task": "fill-mask",
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "neuron": {
+    "auto_cast": null,
+    "auto_cast_type": null,
+    "compiler_type": "neuronx-cc",
+    "compiler_version": "2.19.8089.0+8ab9f450",
+    "disable_fallback": false,
+    "disable_fast_relayout": false,
+    "dynamic_batch_size": false,
+    "inline_weights_to_neff": true,
+    "input_names": [
+      "input_ids",
+      "attention_mask"
+    ],
+    "model_type": "roberta",
+    "optlevel": "2",
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_names": [
+      "logits"
+    ],
+    "static_batch_size": 1,
+    "static_sequence_length": 128,
+    "task": "fill-mask",
+    "tensor_parallel_size": 1
+  },
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "position_embedding_type": "absolute",
+  "tokenizer_class": "BertTokenizer",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 32000
+}

neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "_attn_implementation_autoset": true,
+  "architectures": [
+    "RobertaForMaskedLM"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "neuron": {
+    "auto_cast": null,
+    "auto_cast_type": null,
+    "compiler_type": "neuronx-cc",
+    "compiler_version": "2.19.8089.0+8ab9f450",
+    "disable_fallback": false,
+    "disable_fast_relayout": false,
+    "dynamic_batch_size": false,
+    "inline_weights_to_neff": true,
+    "input_names": [
+      "input_ids",
+      "attention_mask"
+    ],
+    "model_type": "roberta",
+    "optlevel": "2",
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_names": [
+      "logits"
+    ],
+    "static_batch_size": 1,
+    "static_sequence_length": 128,
+    "task": "fill-mask",
+    "tensor_parallel_size": 1
+  },
+  "num_attention_heads": 12,
+  "num_hidden_layers": 6,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "tokenizer_class": "BertTokenizer",
+  "torch_dtype": "float32",
+  "torchscript": true,
+  "transformers_version": "4.51.3",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 32000
+}

neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/model.neuron ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e1a9b1bb1c2999b593c67ca7ff1fa1a32dddcf0df9900e676dcecc73081e7ea4
+size 233266810

neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,60 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "[CLS]",
+  "clean_up_tokenization_spaces": false,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": false,
+  "eos_token": "[SEP]",
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ec67f86344a927b0236/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff