See axolotl config

axolotl version: 0.7.0



# モデルの設定
base_model: /notebooks/plamo-2-1b-gorilla-chat2              # HuggingFace上のモデル名
model_type: AutoModelForCausalLM         # モデルのロードに使用するクラス
tokenizer_type: AutoTokenizer           # トークナイザのロードに使用するクラス
trust_remote_code: true                 # リモートのカスタムコードを信頼してモデルをロード

hub_model_id: zamagi/fft-1
hub_strategy: "end"
push_dataset_to_hub:
hf_use_auth_token: true

plugins:
  - axolotl.integrations.liger.LigerPlugin
liger_cross_entropy: false
liger_rope: true
liger_rms_norm: true
liger_swiglu: true
liger_fused_linear_cross_entropy: true

# 8bit/4bit設定（8bitモードでメモリ削減）
load_in_8bit: false   #f                      # 8bit量子化されたモデルをロード
load_in_4bit: false                     # 4bit量子化は使用しない
strict: false                           # 重みの厳密な一致を要求しない（追加トークン等がある場合に許容）

chat_template: tokenizer_default

# データセットの設定
datasets:
  - path: Aratako/Magpie-Tanuki-Qwen2.5-72B-Answered
    type: chat_template
    field_messages: messages
    message_property_mappings:         # メッセージ内のプロパティ名のマッピング
      role: role                       # 役割（ユーザー/システム/アシスタント）を示すフィールド
      content: content                 # メッセージ内容を示すフィールド
    roles_to_train: ["assistant"]       # 学習対象とする役割（アシスタントの発話のみ学習）
    train_on_eos: last
#  - path: Aratako/magpie-qwen2.5-32b-reasoning-100k-formatted
#    type: chat_template
#    field_messages: conversations
#    message_field_role: role
#    message_field_content: content
#    roles_to_train: ["assistant"]       # 学習対象とする役割（アシスタントの発話のみ学習）
#    train_on_eos: last
#  - path: Aratako/magpie-reasoning-llama-nemotron-70b-100k-filtered
#    type: chat_template
#    field_messages: conversations
#    message_field_role: role
#    message_field_content: content
  - path: Aratako/Open-Platypus-Japanese-masked-formatted
    type: chat_template
    field_messages: conversations
    message_property_mappings:         # メッセージ内のプロパティ名のマッピング
      role: role                       # 役割（ユーザー/システム/アシスタント）を示すフィールド
      content: content                 # メッセージ内容を示すフィールド
    roles_to_train: ["assistant"]       # 学習対象とする役割（アシスタントの発話のみ学習）
    train_on_eos: last

  - path: llm-jp/wizardlm8x22b-logical-math-coding-sft-ja
    type: chat_template
    field_messages: messages
    message_property_mappings:         # メッセージ内のプロパティ名のマッピング
      role: role                       # 役割（ユーザー/システム/アシスタント）を示すフィールド
      content: content                 # メッセージ内容を示すフィールド
    roles_to_train: ["assistant"]       # 学習対象とする役割（アシスタントの発話のみ学習）
    train_on_eos: last
  - path: kanhatakeyama/ramdom-to-fixed-multiturn-Calm3
    split: 20240806filtered
    type: chat_template
    field_messages: messages
    message_property_mappings:         # メッセージ内のプロパティ名のマッピング
      role: role                       # 役割（ユーザー/システム/アシスタント）を示すフィールド
      content: content                 # メッセージ内容を示すフィールド
    roles_to_train: ["assistant"]       # 学習対象とする役割（アシスタントの発話のみ学習）
    train_on_eos: last
#  - path: Aratako/magpie-ultra-v0.1-formatted
#    type: chat_template
#    field_messages: conversations
#    message_field_role: role
#    message_field_content: content
#  - path: Aratako/orca-agentinstruct-1M-v1-selected
#    type: chat_template
#    field_messages: messages
#    message_field_role: role
#    message_field_content: content
  - path: llm-jp/Synthetic-JP-EN-Coding-Dataset
    type: chat_template
    field_messages: messages
    message_property_mappings:         # メッセージ内のプロパティ名のマッピング
      role: role                       # 役割（ユーザー/システム/アシスタント）を示すフィールド
      content: content                 # メッセージ内容を示すフィールド
    roles_to_train: ["assistant"]       # 学習対象とする役割（アシスタントの発話のみ学習）
    train_on_eos: last
  - path: llm-jp/magpie-sft-v1.0         # 使用するデータセット（Hugging Face上のデータセット名）
    type: chat_template                 # 会話形式のデータセットを使用
    field_messages: conversations       # 会話データが格納されたフィールド名
    message_property_mappings:         # メッセージ内のプロパティ名のマッピング
      role: role                       # 役割（ユーザー/システム/アシスタント）を示すフィールド
      content: content                 # メッセージ内容を示すフィールド
    roles_to_train: ["assistant"]       # 学習対象とする役割（アシスタントの発話のみ学習）
    train_on_eos: last

shuffle_merged_datasets: true
dataset_prepared_path: /notebooks/data/fft-data
val_set_size: 0.002
output_dir: /notebooks/data/27b-fft-out-1
dataset_keep_in_memory: false

gpu_memory_limit: 48GiB

sequence_len: 2048
sample_packing: true
eval_sample_packing: false
pad_to_sequence_len: true

adapter:
lora_model_dir:
lora_r:
lora_alpha:
lora_dropout:
lora_target_linear:
lora_fan_in_fan_out:


# トレーニングの設定
gradient_accumulation_steps: 4
micro_batch_size: 8
num_epochs: 2
optimizer: paged_adamw_8bit
lr_scheduler:
cosine_min_lr_ratio: 0.1
learning_rate: 0.00001
max_steps: 10000

train_on_inputs: false
group_by_length: false
bf16: auto
fp16:
tf32: false

#wandb: false
#wandb_project: 27b-fft
#wandb_entity: aratako-lm
#wandb_watch:
#wandb_name: attempt-01
#wandb_log_model:

gradient_checkpointing: true
early_stopping_patience:
auto_resume_from_checkpoints: true
local_rank:
logging_steps: 1
xformers_attention: 
flash_attention: 

save_strategy: steps
save_steps: 100
save_total_limit: 2

warmup_steps: 50
eval_steps: 100
eval_batch_size: 1
eval_table_size:
eval_max_new_tokens:

debug:
deepspeed: /notebooks/axolotl/deepspeed_configs/zero3_bf16.json
weight_decay: 0.01
fsdp:
fsdp_config:


# 出力の保存設定
output_dir: /notebooks/output/plamo-2-1b-gorilla-chat5    # チェックポイントや最終モデルの出力先ディレクトリ
hub_model_id: zamagi/plamo-2-1b-gorilla-chat5   # (オプション) Hugging Face Hubにアップロードする場合のリポジトリ名

plamo-2-1b-gorilla-chat5

This model was trained from scratch on the Aratako/Magpie-Tanuki-Qwen2.5-72B-Answered, the Aratako/Open-Platypus-Japanese-masked-formatted, the llm-jp/wizardlm8x22b-logical-math-coding-sft-ja, the kanhatakeyama/ramdom-to-fixed-multiturn-Calm3, the llm-jp/Synthetic-JP-EN-Coding-Dataset and the llm-jp/magpie-sft-v1.0 datasets. It achieves the following results on the evaluation set:

Loss: 1.2854

Model description

More information needed

Intended uses & limitations

More information needed

Training and evaluation data

More information needed

Training procedure

Training hyperparameters

The following hyperparameters were used during training:

learning_rate: 1e-05
train_batch_size: 8
eval_batch_size: 1
seed: 42
distributed_type: multi-GPU
gradient_accumulation_steps: 4
total_train_batch_size: 32
optimizer: Use OptimizerNames.PAGED_ADAMW_8BIT with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
lr_scheduler_type: cosine
lr_scheduler_warmup_steps: 50
training_steps: 10000

Training results

Training Loss	Epoch	Step	Validation Loss
1.4277	0.0002	1	1.5568
1.3262	0.0196	100	1.4437
1.2695	0.0391	200	1.4289
1.4199	0.0587	300	1.4149
1.2383	0.0783	400	1.4073
1.418	0.0979	500	1.3987
1.2148	0.1174	600	1.3954
1.3301	0.1370	700	1.3906
1.3418	0.1566	800	1.3850
1.248	0.1762	900	1.3801
1.3027	0.1957	1000	1.3762
1.3965	0.2153	1100	1.3768
1.2422	0.2349	1200	1.3747
1.2969	0.2544	1300	1.3682
1.248	0.2740	1400	1.3629
1.3203	0.2936	1500	1.3582
1.2637	0.3132	1600	1.3576
1.3398	0.3327	1700	1.3559
1.1934	0.3523	1800	1.3508
1.1992	0.3719	1900	1.3525
1.1816	0.3914	2000	1.3475
1.1562	0.4110	2100	1.3441
1.373	0.4306	2200	1.3374
1.2188	0.4502	2300	1.3383
1.1738	0.4697	2400	1.3376
1.2344	0.4893	2500	1.3318
1.291	0.5089	2600	1.3289
1.2148	0.5285	2700	1.3254
1.248	0.5480	2800	1.3245
1.2988	0.5676	2900	1.3260
1.3359	0.5872	3000	1.3255
1.2109	0.6067	3100	1.3222
1.2656	0.6263	3200	1.3191
1.2109	0.6459	3300	1.3160
1.2676	0.6655	3400	1.3136
1.1426	0.6850	3500	1.3137
1.2422	0.7046	3600	1.3262
1.2188	0.7242	3700	1.3283
1.2891	0.7437	3800	1.3277
1.1758	0.7633	3900	1.3232
1.1846	0.7829	4000	1.3268
1.3418	0.8025	4100	1.3235
1.2812	0.8220	4200	1.3214
1.2793	0.8416	4300	1.3202
1.1758	0.8612	4400	1.3196
1.2188	0.8808	4500	1.3198
1.1719	0.9003	4600	1.3177
1.1738	0.9199	4700	1.3129
1.3555	0.9395	4800	1.3154
1.2207	0.9590	4900	1.3152
1.1445	0.9786	5000	1.3110
1.2891	0.9982	5100	1.3094
1.0527	1.0178	5200	1.3123
1.0527	1.0374	5300	1.3120
1.1777	1.0570	5400	1.3124
1.0879	1.0765	5500	1.3128
1.1836	1.0961	5600	1.3114
1.1406	1.1157	5700	1.3117
1.1152	1.1352	5800	1.3092
1.1387	1.1548	5900	1.3106
1.2715	1.1744	6000	1.3063
1.1855	1.1940	6100	1.3070
1.1895	1.2135	6200	1.3070
1.1309	1.2331	6300	1.3063
1.0918	1.2527	6400	1.3043
1.0977	1.2723	6500	1.3050
1.0332	1.2918	6600	1.3028
0.9697	1.3114	6700	1.3012
1.1504	1.3310	6800	1.3006
1.1152	1.3505	6900	1.3013
1.0127	1.3701	7000	1.2998
1.1387	1.3897	7100	1.2993
1.0664	1.4093	7200	1.2970
1.1299	1.4288	7300	1.2971
1.1406	1.4484	7400	1.2971
1.0684	1.4680	7500	1.2969
1.0938	1.4875	7600	1.2966
1.1221	1.5071	7700	1.2943
1.0771	1.5267	7800	1.2937
1.1211	1.5463	7900	1.2938
1.043	1.5658	8000	1.2941
1.0537	1.5854	8100	1.2924
1.0859	1.6050	8200	1.2918
1.1836	1.6246	8300	1.2911
1.2188	1.6441	8400	1.2906
1.0596	1.6637	8500	1.2912
1.041	1.6833	8600	1.2904
1.1367	1.7028	8700	1.2904
1.1006	1.7224	8800	1.2891
1.0996	1.7420	8900	1.2898
1.1387	1.7616	9000	1.2883
1.1543	1.7811	9100	1.2888
1.1328	1.8007	9200	1.2876
1.0801	1.8203	9300	1.2872
1.1855	1.8398	9400	1.2880
1.1113	1.8594	9500	1.2860
1.1289	1.8790	9600	1.2865
1.1543	1.8986	9700	1.2857
1.123	1.9181	9800	1.2856
1.0352	1.9377	9900	1.2857
0.9189	1.9573	10000	1.2854

Framework versions

Transformers 4.49.0
Pytorch 2.5.1+cu124
Datasets 3.2.0
Tokenizers 0.21.1

zamagi
/

plamo-2-1b-gorilla-chat5

plamo-2-1b-gorilla-chat5

Model description

Intended uses & limitations

Training and evaluation data

Training procedure

Training hyperparameters

Training results

Framework versions

Datasets used to train zamagi/plamo-2-1b-gorilla-chat5

Evaluation results