nananatsu commited on
Commit
1f5289c
·
verified ·
1 Parent(s): 6455d12

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +77 -0
  2. adapter_config.json +39 -0
  3. adapter_model.safetensors +3 -0
README.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language: zh
3
+ tags:
4
+ - qwen3
5
+ - lora
6
+ - fine-tuning
7
+ - translation
8
+ base_model: Qwen/Qwen3-8B
9
+ ---
10
+
11
+ # Qwen3-8B-Translator-LoRA
12
+
13
+ This model is a fine-tuned version of `Qwen/Qwen3-8B` using LoRA for English to Chinese translation, specifically tailored for audio product terminology.
14
+
15
+ ## Fine-tuning Details
16
+
17
+ - **Fine-tuning Method:** LoRA (Low-Rank Adaptation)
18
+ - **Dataset:** Custom parallel corpus for audio products (English-Chinese)
19
+ - **Framework:** PyTorch, Hugging Face Transformers, TRL, PEFT, Optimum TPU
20
+ - **Hardware:** Google Cloud TPU v3-8
21
+
22
+ ## Training Procedure
23
+
24
+ The model was trained using the `SFTTrainer` from the TRL library.
25
+
26
+ ### Training Hyperparameters
27
+
28
+ - `max_seq_length`: 768
29
+ - `per_device_train_batch_size`: 32
30
+ - `per_device_eval_batch_size`: 32
31
+ - `num_train_epochs`: 10
32
+ - `eval_strategy`: "steps"
33
+ - `eval_steps`: 10
34
+ - `learning_rate`: 2e-5
35
+ - `lr_scheduler_type`: "cosine"
36
+ - `warmup_ratio`: 0.1
37
+ - `weight_decay`: 0.01
38
+ - `optim`: "adamw_torch_xla"
39
+
40
+ ### LoRA Configuration
41
+
42
+ - `r`: 128
43
+ - `lora_alpha`: 256
44
+ - `lora_dropout`: 0.05
45
+ - `bias`: "none"
46
+ - `target_modules`: ["q_proj", "v_proj", "gate_proj", "down_proj"]
47
+ - `modules_to_save`: ["lm_head", "embed_tokens"]
48
+
49
+ ## Training Results
50
+
51
+ | Step | Training Loss | Validation Loss |
52
+ |------|---------------|-----------------|
53
+ | 10 | 0.844400 | 0.635387 |
54
+ | 20 | 0.486000 | 0.407656 |
55
+ | 30 | 0.439900 | 0.381002 |
56
+ | 40 | 0.391100 | 0.365226 |
57
+ | 50 | 0.370300 | 0.352978 |
58
+ | 60 | 0.307100 | 0.345395 |
59
+ | 70 | 0.368900 | 0.340513 |
60
+ | 80 | 0.306000 | 0.335354 |
61
+ | 90 | 0.273900 | 0.333215 |
62
+ | 100 | 0.272400 | 0.334439 |
63
+ | 110 | 0.256300 | 0.331390 |
64
+ | 120 | 0.226100 | 0.334290 |
65
+ | 130 | 0.246800 | 0.338176 |
66
+ | 140 | 0.230500 | 0.339353 |
67
+
68
+ ## Intended Use
69
+
70
+ This model is intended for translating English text related to audio products into Chinese. It can be used by professionals in the audio industry, technical writers, or anyone needing to translate such content.
71
+
72
+ ## Limitations and Bias
73
+
74
+ - The model's performance is best on text similar to the data it was trained on (audio product domain).
75
+ - It may not generalize well to other domains or highly colloquial language.
76
+ - As with any language model, there's a potential for biases present in the training data to be reflected in the output.
77
+
adapter_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "/kaggle/input/qwen-3/transformers/8b/1",
5
+ "bias": "none",
6
+ "corda_config": null,
7
+ "eva_config": null,
8
+ "exclude_modules": null,
9
+ "fan_in_fan_out": false,
10
+ "inference_mode": true,
11
+ "init_lora_weights": true,
12
+ "layer_replication": null,
13
+ "layers_pattern": null,
14
+ "layers_to_transform": null,
15
+ "loftq_config": {},
16
+ "lora_alpha": 256,
17
+ "lora_bias": false,
18
+ "lora_dropout": 0.05,
19
+ "megatron_config": null,
20
+ "megatron_core": "megatron.core",
21
+ "modules_to_save": [
22
+ "lm_head",
23
+ "embed_tokens"
24
+ ],
25
+ "peft_type": "LORA",
26
+ "r": 128,
27
+ "rank_pattern": {},
28
+ "revision": null,
29
+ "target_modules": [
30
+ "v_proj",
31
+ "q_proj",
32
+ "down_proj",
33
+ "gate_proj"
34
+ ],
35
+ "task_type": "CAUSAL_LM",
36
+ "trainable_token_indices": null,
37
+ "use_dora": false,
38
+ "use_rslora": false
39
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48d382385148dbdc194f78e0a465c825f9455ed8ab63844cb73097fa2107075e
3
+ size 3338705488