Upload folder using huggingface_hub

Files changed (4) hide show

config.json CHANGED Viewed

@@ -24,7 +24,7 @@
   "plugin_config": {
     "attention_qk_half_accumulation": false,
     "bert_attention_plugin": false,
-    "context_fmha_type": 1,
     "gemm_plugin": "float16",
     "gpt_attention_plugin": "float16",
     "identity_plugin": false,
@@ -32,14 +32,14 @@
     "layernorm_quantization_plugin": false,
     "lookup_plugin": false,
     "nccl_plugin": "float16",
-    "paged_kv_cache": false,
     "quantize_per_token_plugin": false,
     "quantize_tensor_plugin": false,
     "remove_input_padding": true,
     "rmsnorm_plugin": false,
     "rmsnorm_quantization_plugin": false,
     "smooth_quant_gemm_plugin": false,
-    "tokens_per_block": 0,
     "use_custom_all_reduce": false,
     "weight_only_groupwise_quant_matmul_plugin": "float16",
     "weight_only_quant_matmul_plugin": false

   "plugin_config": {
     "attention_qk_half_accumulation": false,
     "bert_attention_plugin": false,
+    "context_fmha_type": 0,
     "gemm_plugin": "float16",
     "gpt_attention_plugin": "float16",
     "identity_plugin": false,
     "layernorm_quantization_plugin": false,
     "lookup_plugin": false,
     "nccl_plugin": "float16",
+    "paged_kv_cache": true,
     "quantize_per_token_plugin": false,
     "quantize_tensor_plugin": false,
     "remove_input_padding": true,
     "rmsnorm_plugin": false,
     "rmsnorm_quantization_plugin": false,
     "smooth_quant_gemm_plugin": false,
+    "tokens_per_block": 64,
     "use_custom_all_reduce": false,
     "weight_only_groupwise_quant_matmul_plugin": "float16",
     "weight_only_quant_matmul_plugin": false

llama_float16_tp2_rank0.engine CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:75488f564582b9856b4c31ece6ab4e24df662ec0961f46dba1fd70d41f244108
-size 18261991260

 version https://git-lfs.github.com/spec/v1
+oid sha256:2ed39223086dd488a82e72c7fe581965b10db03f196771b3037112f67af58583
+size 18261922724

llama_float16_tp2_rank1.engine CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:59d366ece059bb046c18644b72adc88bde251e4d32bff944d8738fc0d0ffc5e3
-size 18261991260

 version https://git-lfs.github.com/spec/v1
+oid sha256:45bae29bdff0191252a6942f858d707233b59f1c9ce726e3de06e8dabdbf553b
+size 18261922724

model.cache CHANGED Viewed

Binary files a/model.cache and b/model.cache differ