devadigaprathamesh
/

JurisQwen

Text Generation

text-generation-inference

Model card Files Files and versions Community

devadigaprathamesh commited on Apr 1

Commit

516a4ed

·

verified ·

1 Parent(s): a4e4f12

Update config.json

Files changed (1) hide show

config.json +26 -62

config.json CHANGED Viewed

@@ -1,64 +1,28 @@
 {
-  "app_name": "qwen-law-finetuning",
-  "model": {
-    "base_model_id": "Qwen/Qwen2.5-7B",
-    "max_seq_length": 4096,
-    "quantization": {
-      "load_in_4bit": true,
-      "attn_implementation": "flash_attention_2",
-      "dtype": "bfloat16"
-    }
-  },
-  "peft_config": {
-    "r": 32,
-    "target_modules": [
-      "q_proj",
-      "k_proj",
-      "v_proj",
-      "o_proj",
-      "gate_proj",
-      "up_proj",
-      "down_proj"
-    ],
-    "lora_alpha": 64,
-    "lora_dropout": 0.05,
-    "bias": "none",
-    "use_gradient_checkpointing": "unsloth"
-  },
-  "training": {
-    "dataset": "viber1/indian-law-dataset",
-    "output_dir": "/data/JurisQwen",
-    "checkpoint_dir": "/data/checkpoints",
-    "num_train_epochs": 3,
-    "per_device_train_batch_size": 16,
-    "gradient_accumulation_steps": 2,
-    "optimizer": "adamw_8bit",
-    "learning_rate": 2e-4,
-    "weight_decay": 0.001,
-    "lr_scheduler_type": "cosine",
-    "warmup_ratio": 0.1,
-    "precision": {
-      "bf16": true,
-      "fp16": false,
-      "tf32": true
-    }
-  },
-  "inference": {
-    "max_new_tokens": 512
-  },
-  "compute": {
-    "gpu": "A100-40GB",
-    "training_timeout_hours": 5,
-    "inference_timeout_minutes": 10
-  },
-  "prompt_template": {
-    "user_prefix": "<|im_start|>user\n",
-    "user_suffix": "<|im_end|>",
-    "assistant_prefix": "<|im_start|>assistant\n",
-    "assistant_suffix": "<|im_end|>"
-  },
-  "volumes": {
-    "path": "/data",
-    "name": "finetune-volume"
-  }
 }

 {
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151643,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 131072,
+  "max_window_layers": 28,
+  "model_type": "qwen2",
+  "num_attention_heads": 28,
+  "num_hidden_layers": 28,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 1000000.0,
+  "sliding_window": 131072,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.40.1",
+  "use_cache": true,
+  "use_mrope": false,
+  "use_sliding_window": false,
+  "vocab_size": 152064
 }