2025-04-24 10:24:20,886 INFO [train.py:653] { "allowed_excess_duration_ratio": 0.1, "audio_key": "question_audio", "batch_idx_train": 0, "best_train_epoch": -1, "best_train_loss": Infinity, "best_valid_epoch": -1, "best_valid_loss": Infinity, "bucketing_sampler": true, "deepscale": false, "deepscale_config": null, "deepspeed": true, "deepspeed_config": "./slam_omni/ds_config_zero1.json", "drop_last": true, "enable_musan": false, "enable_spec_aug": true, "enable_speech_output": true, "encoder_projector_ds_rate": 8, "env_info": { "IP address": "0.114.183.253", "hostname": "7518205", "icefall-git-branch": null, "icefall-git-date": null, "icefall-git-sha1": null, "icefall-path": "/workspace/slam/icefall_omni", "k2-build-type": "Release", "k2-git-date": "Tue Oct 29 09:02:19 2024", "k2-git-sha1": "75e2ed6b2fd87c22b7f3f34bad48a69984bb8755", "k2-path": "/opt/conda/lib/python3.11/site-packages/k2/__init__.py", "k2-version": "1.24.4", "k2-with-cuda": true, "lhotse-path": "/workspace/slam/lhotse/lhotse/__init__.py", "lhotse-version": "1.30.0.dev+git.13c7616f.dirty", "python-version": "3.11", "torch-cuda-available": true, "torch-cuda-version": "12.4", "torch-version": "2.4.0" }, "exp_dir": "slam_omni/exp_speech2speech_rerun", "frame_shift_ms": 10, "huggingface_dataset_path_or_name": "/workspace/Belle_1.4M-SLAM-Omni", "input_strategy": "PrecomputedFeatures", "llm_path_or_name": "models/Qwen2.5-0.5B-Instruct", "log_interval": 50, "manifest_dir": "data/fbank", "max_duration": 50, "num_buckets": 30, "num_epochs": 10, "num_workers": 2, "on_the_fly_feats": false, "pretrained_model_path": null, "resample_to_16kHz": true, "reset_interval": 200, "return_cuts": true, "sampler_state_dict_path": null, "seed": 42, "shuffle": true, "spec_aug_time_warp_factor": 80, "speech_encoder_path_or_name": "models/whisper/v1.1/whisper-large-v2-multi-hans-zh-epoch-3-avg-10.pt", "start_epoch": 1, "subsampling_factor": 2, "tensorboard": true, "text_key": "answer", "unfreeze_llm": true, "unfreeze_speech_projector": true, "use_flash_attn": true, "use_fp16": true, "use_lora": true, "valid_interval": 5000 } 2025-04-24 10:24:20,886 INFO [train.py:655] About to create model 2025-04-24 10:24:39,724 INFO [train.py:808] Number of model parameters: 1314396804 2025-04-24 10:24:39,724 INFO [train.py:810] Trainable parameters (excluding model.eval modules): 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.0.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,725 INFO [train.py:813] llm.base_model.model.model.layers.1.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.1.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.2.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.2.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.2.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.2.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.2.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.2.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.2.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,726 INFO [train.py:813] llm.base_model.model.model.layers.2.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.2.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.2.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.2.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.2.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.2.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.2.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.3.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,727 INFO [train.py:813] llm.base_model.model.model.layers.4.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.4.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.5.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.5.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.5.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.5.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.5.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.5.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,728 INFO [train.py:813] llm.base_model.model.model.layers.5.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.5.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.5.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.5.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.5.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.5.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.5.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.5.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.6.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,729 INFO [train.py:813] llm.base_model.model.model.layers.7.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.7.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.8.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.8.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.8.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.8.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.8.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.8.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,730 INFO [train.py:813] llm.base_model.model.model.layers.8.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.8.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.8.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.8.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.8.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.8.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.8.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.8.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,731 INFO [train.py:813] llm.base_model.model.model.layers.9.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.10.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.11.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.11.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.11.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.11.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.11.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,732 INFO [train.py:813] llm.base_model.model.model.layers.11.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.11.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.11.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.11.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.11.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.11.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.11.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.11.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.11.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,733 INFO [train.py:813] llm.base_model.model.model.layers.12.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.13.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.14.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.14.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.14.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.14.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.14.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,734 INFO [train.py:813] llm.base_model.model.model.layers.14.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.14.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.14.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.14.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.14.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.14.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.14.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.14.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.14.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,735 INFO [train.py:813] llm.base_model.model.model.layers.15.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.15.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.16.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.17.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.17.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.17.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.17.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.17.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,736 INFO [train.py:813] llm.base_model.model.model.layers.17.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.17.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.17.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.17.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.17.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.17.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.17.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.17.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.17.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,737 INFO [train.py:813] llm.base_model.model.model.layers.18.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.18.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.19.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.20.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.20.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.20.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.20.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,738 INFO [train.py:813] llm.base_model.model.model.layers.20.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.20.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.20.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.20.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.20.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.20.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.20.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.20.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.20.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,739 INFO [train.py:813] llm.base_model.model.model.layers.21.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.21.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.22.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.23.self_attn.q_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.23.self_attn.q_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.23.self_attn.k_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.23.self_attn.k_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.23.self_attn.v_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,740 INFO [train.py:813] llm.base_model.model.model.layers.23.self_attn.v_proj.lora_B.default.weight: torch.Size([128, 64]) 2025-04-24 10:24:39,741 INFO [train.py:813] llm.base_model.model.model.layers.23.self_attn.o_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,741 INFO [train.py:813] llm.base_model.model.model.layers.23.self_attn.o_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,741 INFO [train.py:813] llm.base_model.model.model.layers.23.mlp.gate_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,741 INFO [train.py:813] llm.base_model.model.model.layers.23.mlp.gate_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,741 INFO [train.py:813] llm.base_model.model.model.layers.23.mlp.up_proj.lora_A.default.weight: torch.Size([64, 896]) 2025-04-24 10:24:39,741 INFO [train.py:813] llm.base_model.model.model.layers.23.mlp.up_proj.lora_B.default.weight: torch.Size([4864, 64]) 2025-04-24 10:24:39,741 INFO [train.py:813] llm.base_model.model.model.layers.23.mlp.down_proj.lora_A.default.weight: torch.Size([64, 4864]) 2025-04-24 10:24:39,741 INFO [train.py:813] llm.base_model.model.model.layers.23.mlp.down_proj.lora_B.default.weight: torch.Size([896, 64]) 2025-04-24 10:24:39,741 INFO [train.py:813] encoder_projector.linear1.weight: torch.Size([896, 10240]) 2025-04-24 10:24:39,741 INFO [train.py:813] encoder_projector.linear1.bias: torch.Size([896]) 2025-04-24 10:24:39,741 INFO [train.py:813] encoder_projector.linear2.weight: torch.Size([896, 896]) 2025-04-24 10:24:39,741 INFO [train.py:813] encoder_projector.linear2.bias: torch.Size([896]) 2025-04-24 10:24:39,741 INFO [train.py:813] codec_lm.model.embed_tokens.weight: torch.Size([4100, 1024]) 2025-04-24 10:24:39,741 INFO [train.py:813] codec_lm.model.layers.0.self_attn.q_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,741 INFO [train.py:813] codec_lm.model.layers.0.self_attn.q_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,741 INFO [train.py:813] codec_lm.model.layers.0.self_attn.k_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,741 INFO [train.py:813] codec_lm.model.layers.0.self_attn.k_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,741 INFO [train.py:813] codec_lm.model.layers.0.self_attn.v_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,741 INFO [train.py:813] codec_lm.model.layers.0.self_attn.v_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,741 INFO [train.py:813] codec_lm.model.layers.0.self_attn.o_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,741 INFO [train.py:813] codec_lm.model.layers.0.mlp.gate_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.0.mlp.up_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.0.mlp.down_proj.weight: torch.Size([1024, 2048]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.0.input_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.0.post_attention_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.1.self_attn.q_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.1.self_attn.q_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.1.self_attn.k_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.1.self_attn.k_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.1.self_attn.v_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.1.self_attn.v_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.1.self_attn.o_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.1.mlp.gate_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.1.mlp.up_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.1.mlp.down_proj.weight: torch.Size([1024, 2048]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.1.input_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.1.post_attention_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.2.self_attn.q_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.2.self_attn.q_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.2.self_attn.k_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.2.self_attn.k_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.2.self_attn.v_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.2.self_attn.v_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,742 INFO [train.py:813] codec_lm.model.layers.2.self_attn.o_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.2.mlp.gate_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.2.mlp.up_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.2.mlp.down_proj.weight: torch.Size([1024, 2048]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.2.input_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.2.post_attention_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.3.self_attn.q_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.3.self_attn.q_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.3.self_attn.k_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.3.self_attn.k_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.3.self_attn.v_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.3.self_attn.v_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.3.self_attn.o_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.3.mlp.gate_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.3.mlp.up_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.3.mlp.down_proj.weight: torch.Size([1024, 2048]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.3.input_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.3.post_attention_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.4.self_attn.q_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.4.self_attn.q_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.4.self_attn.k_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.4.self_attn.k_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.4.self_attn.v_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,743 INFO [train.py:813] codec_lm.model.layers.4.self_attn.v_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.4.self_attn.o_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.4.mlp.gate_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.4.mlp.up_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.4.mlp.down_proj.weight: torch.Size([1024, 2048]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.4.input_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.4.post_attention_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.5.self_attn.q_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.5.self_attn.q_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.5.self_attn.k_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.5.self_attn.k_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.5.self_attn.v_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.5.self_attn.v_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.5.self_attn.o_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.5.mlp.gate_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.5.mlp.up_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.5.mlp.down_proj.weight: torch.Size([1024, 2048]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.5.input_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.5.post_attention_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.6.self_attn.q_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.6.self_attn.q_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.6.self_attn.k_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.6.self_attn.k_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.6.self_attn.v_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.6.self_attn.v_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,744 INFO [train.py:813] codec_lm.model.layers.6.self_attn.o_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.6.mlp.gate_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.6.mlp.up_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.6.mlp.down_proj.weight: torch.Size([1024, 2048]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.6.input_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.6.post_attention_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.7.self_attn.q_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.7.self_attn.q_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.7.self_attn.k_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.7.self_attn.k_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.7.self_attn.v_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.7.self_attn.v_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.7.self_attn.o_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.7.mlp.gate_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.7.mlp.up_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.7.mlp.down_proj.weight: torch.Size([1024, 2048]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.7.input_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.7.post_attention_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.8.self_attn.q_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.8.self_attn.q_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.8.self_attn.k_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.8.self_attn.k_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.8.self_attn.v_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.8.self_attn.v_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.8.self_attn.o_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,745 INFO [train.py:813] codec_lm.model.layers.8.mlp.gate_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.8.mlp.up_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.8.mlp.down_proj.weight: torch.Size([1024, 2048]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.8.input_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.8.post_attention_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.9.self_attn.q_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.9.self_attn.q_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.9.self_attn.k_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.9.self_attn.k_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.9.self_attn.v_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.9.self_attn.v_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.9.self_attn.o_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.9.mlp.gate_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.9.mlp.up_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.9.mlp.down_proj.weight: torch.Size([1024, 2048]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.9.input_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.9.post_attention_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.10.self_attn.q_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.10.self_attn.q_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.10.self_attn.k_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.10.self_attn.k_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.10.self_attn.v_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.10.self_attn.v_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.10.self_attn.o_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,746 INFO [train.py:813] codec_lm.model.layers.10.mlp.gate_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.10.mlp.up_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.10.mlp.down_proj.weight: torch.Size([1024, 2048]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.10.input_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.10.post_attention_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.11.self_attn.q_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.11.self_attn.q_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.11.self_attn.k_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.11.self_attn.k_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.11.self_attn.v_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.11.self_attn.v_proj.bias: torch.Size([1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.11.self_attn.o_proj.weight: torch.Size([1024, 1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.11.mlp.gate_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.11.mlp.up_proj.weight: torch.Size([2048, 1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.11.mlp.down_proj.weight: torch.Size([1024, 2048]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.11.input_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.layers.11.post_attention_layernorm.weight: torch.Size([1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.model.norm.weight: torch.Size([1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm.lm_head.weight: torch.Size([4100, 1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] speech_token_projector.weight: torch.Size([1024, 1792]) 2025-04-24 10:24:39,747 INFO [train.py:813] speech_token_projector.bias: torch.Size([1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm_head.weight: torch.Size([4100, 1024]) 2025-04-24 10:24:39,747 INFO [train.py:813] codec_lm_head.bias: torch.Size([4100]) 2025-04-24 10:24:39,747 INFO [train.py:819] Device: cuda:6 2025-04-24 10:24:40,785 INFO [train.py:823] Using DeepSpeed 2025-04-24 10:24:44,788 INFO [data_module.py:445] About to get train cuts 2025-04-24 10:24:44,791 INFO [data_module.py:250] Disable MUSAN 2025-04-24 10:24:44,791 INFO [data_module.py:268] Enable SpecAugment 2025-04-24 10:24:44,791 INFO [data_module.py:269] Time warp factor: 80 2025-04-24 10:24:44,791 INFO [data_module.py:279] Num frame mask: 10 2025-04-24 10:24:44,791 INFO [data_module.py:292] About to create train dataset 2025-04-24 10:24:44,791 INFO [data_module.py:319] Using DynamicBucketingSampler. 2025-04-24 10:24:45,784 INFO [data_module.py:336] About to create train dataloader 2025-04-24 10:24:45,785 INFO [data_module.py:436] About to get test cuts 2025-04-24 10:24:45,786 INFO [data_module.py:365] About to create dev dataset 2025-04-24 10:24:45,873 INFO [data_module.py:379] About to create dev dataloader 2025-04-24 10:24:45,878 INFO [train.py:875] start training from epoch 1 2025-04-24 10:25:15,190 INFO [train.py:539] Computing validation loss 2025-04-24 10:25:21,741 INFO [train.py:548] Epoch 1, validation: loss=0.005362, acc=0.4658, codec_acc=0.0001369, codec_topk_acc=0.002175, codec_loss=0.004042, text_loss=0.00132, over 285507.00 frames. 2025-04-24 10:25:21,742 INFO [train.py:549] Maximum memory allocated so far is 11626MB 2025-04-24 10:25:22,193 INFO [train.py:611] Epoch 1, batch 0, loss[loss=0.00509, acc=0.4134, codec_acc=0, codec_topk_acc=0.00052025-04-24 10:25:22,193 INFO [train.py:611] Epoch 1, batch 0, loss[loss=0.004859, acc=0.442, codec_acc=0, codec_topk_acc=0.00408, codec_loss=0.003722, text_loss=0.001137, over 2279.00 frames. ], tot_loss[loss=0.004859, acc=0.442, codec202025-04-24 10:25:48,485 INFO [train.py:611] Epoch 1, batch 50, loss[loss=0.004133, acc=0.4768, codec_acc=0.07379, codec_topk_acc=0.1561, codec_loss=0.002983, text_loss=0.001149, over 2036.00 frames. ], tot_loss[loss=0.004392, acc=0.468, codec_acc=0.02233, codec_topk_acc=0.07936, codec_loss=0.003226, text_loss=0.001166, over 99113.87 frames. ], batch size: 3, lr: 8.36e-05,2025-042025-04-24 10:26:20,975 INFO [train.py:611] Epoch 1, batch 100, loss[loss=0.002749, acc=0.535, codec_acc=0.1943, codec_topk_acc=0.5295, codec_loss=0.001867, text_loss=0.0008826, over 2266.00 frames. ], tot_loss[loss=0.003752, acc=0.4824, codec_acc=0.08966, codec_topk_acc=0.2547, codec_loss=0.002669, text_loss=0.001084, over 176419.03 frames. ], batch size: 5, lr: 9.9322025-022025-04-24 10:26:51,215 INFO [train.py:611] Epoch 1, batch 150, loss[loss=0.002714, acc=0.638, codec_acc=0.1871, codec_topk_acc=0.585, codec_loss=0.001994, text_loss=0.0007202, over 1937.00 frames. ], tot_loss[loss=0.003351, acc=0.4877, codec_acc=0.133, codec_topk_acc=0.3835, codec_loss=0.002288, text_loss=0.001063, over 237010.50 frames. ], batch size: 3, lr: 1.0222025-02025-04-24 10:27:15,289 INFO [train.py:611] Epoch 1, batch 200, loss[loss=0.002522, acc=0.4444, codec_acc=0.2527, codec_topk_acc=0.6693, codec_loss=0.001437, text_loss=0.001086, over 2340.00 frames. ], tot_loss[loss=0.003114, acc=0.4868, codec_acc=0.1655, codec_topk_acc=0.4705, codec_loss=0.002051, text_loss=0.001063, over 283089.22 frames. ], batch size: 13, lr: 1.202520252025-04-24 10:27:39,458 INFO [train.py:611] Epoch 1, batch 250, loss[loss=0.00231, acc=0.5301, codec_acc=0.2525, codec_topk_acc=0.7025, codec_loss=0.001446, text_loss=0.0008644, over 2229.00 frames. ], tot_loss[loss=0.002944, acc=0.4909, codec_acc=0.1921, codec_topk_acc=0.5347, codec_loss=0.001884, text_loss=0.00106, over 318209.08 frames. ], batch size: 10, lr: 120252025-02025-04-24 102025-04-24 10:28:03,473 INFO [train.py:611] Epoch 1, batch 300, loss[loss=0.002492, acc=0.437, codec_acc=0.2681, codec_topk_acc=0.7107, codec_loss=0.001361, text_loss=0.001131, over 2301.00 frames. ], tot_loss[loss=0.002821, acc=0.4898, codec_acc=0.2108, codec_topk_acc=0.5803, codec_loss=0.001762, text_loss=0.001059, over 345962.89 frames. ], batch si2202025-04-2025-04-24 2025-04-24 10:28:27,748 INFO [train.py:611] Epoch 1, batch 350, loss[loss=0.001892, acc=0.6455, codec_a2025-04-242025-04-24 10:28:27,749 INFO [train.py:611] Epoch 1, batch 350, loss[loss=0.002184, acc=0.4821, codec_acc=0.2841, codec_topk_acc=0.7237, codec_loss=0.00125, text_loss=0.0009339, over 2439.00 frames. ], tot_loss[loss=0.002691, acc=0.4964, cod22025-042025-04-24 10:22025-04-24 10:28:51,487 INFO [train.py:611] Epoch 1, batch 400, loss[loss=0.002325, acc=0.5755, cod2025-04-24 12025-04-24 10:28:51,487 INFO [train.py:611] Epoch 1, batch 400, loss[loss=0.002238, acc=0.5698, codec_acc=0.2942, codec_topk_acc=0.7326, codec_loss=0.001393, text_loss=0.0008451, over 2141.00 frames. ], tot_loss[loss=0.002616, acc=0.4995, c2022025-04-24 10:29:19,42025-04-24 10:29:19,418 INFO [train.py:611] Epoch 1, batch 450, loss[loss=0.002253, acc=0.4948, codec_acc=0.2971, codec_topk_acc=0.7218, codec_loss=0.001238, text_loss=0.001015, over 2423.00 frames. ], tot_loss[loss=0.002547, acc=0.5046, codec_acc=0.2496, codec_topk_acc=0.6645, codec_loss=0.001535, text_loss=0.001013, over 398095.41 frames. ], batch s22025-04-24 10:29:42,826 INFO [train.py:611] Epoch 1, batch 500, loss[loss=0.002221, acc=0.429, codec_acc=0.2907, codec_topk_acc=0.7454, codec_loss=0.001199, text_loss=0.001022, over 2451.00 frames. ], tot_loss[loss=0.002493, acc=0.5068, codec_acc=0.2589, codec_topk_acc=0.6824, codec_loss=0.001488, text_loss=0.001004, over 407640.15 frames. ], batch size: 10, lr: 1.00e-04, 22025-04-24 10:30:07,436 INFO [train.py:611] Epoch 1, batch 550, loss[loss=0.002213, acc=0.4904, codec_acc=0.3016, codec_topk_acc=0.7562025-04-24 10:30:07,436 INFO [train.py:611] Epoch 1, batch 550, loss[loss=0.002336, acc=0.4527, codec_acc=0.2926, codec_topk_acc=0.7477, codec_loss=0.001275, text_loss=0.00106, over 2282.00 frames. ], tot_loss[loss=0.00245, acc=0.5074, codec_a22025-042025-04-24 10:30:31,574 INFO [train.py:611] Epoch 1, batch 600, loss[loss=0.002241, acc=0.516, codec_acc=0.3012, codec_topk_2025-04-24 10:30:31,575 INFO [train.py:611] Epoch 1, batch 600, loss[loss=0.002451, acc=0.488, codec_acc=0.288, codec_topk_acc=0.7507, codec_loss=0.00132, text_loss=0.001132, over 2211.00 frames. ], tot_loss[loss=0.002418, acc=0.5087, codec_ac22022025-04-24 10:30:54,936 INFO [train.py:611] Epoch 1, batch 650, loss[loss=0.002215, acc=0.5481, codec_acc=0.3004, codec_topk_acc=0.7513, codec_loss=0.001311, text_loss=0.0009039, over 2200.00 frames. ], tot_loss[loss=0.002357, acc=0.5216, codec_acc=0.2778, codec_topk_acc=0.7185, codec_loss=0.001395, text_loss=0.0009619, over 426044.38 frames. ], batch size: 6, lr: 1.02025-04-24 102025-04-24 10:31:22,917 INFO [train.py:611] Epoch 1, batch 700, loss[loss=0.001778, acc=0.6486, codec_acc=0.3402, codec_topk_acc=0.7675, codec_loss=0.001191, text_loss=0.0005867, over 2298.00 frames. ], tot_loss[loss=0.002321, acc=0.521, codec_acc=0.2817, codec_topk_acc=0.7259, codec_loss=0.001366, text_loss=0.0009549, over 432263.92 frames. ], batch size: 5, lr:2025-04-24 120252025-04-24 10:31:50,210 INFO [train.py:611] Epoch 1, batch 750, loss[loss=0.002454, acc=0.4933, codec_acc=0.3011, codec_topk_acc=0.7417, codec_loss=0.001356, text_loss=0.001098, over 2141.00 frames. ], tot_loss[loss=0.002314, acc=0.5217, codec_acc=0.2855, codec_topk_acc=0.733, codec_loss=0.001356, text_loss=0.0009587, over 432626.25 frames. ], batch size: 2025-04-22025-04-24 10:32:14,261 INFO [train.py:611] Epoch 1, batch 800, loss[loss=0.002465, acc=0.4333, codec_acc=0.3007, codec_topk_acc=0.7625, codec_loss=0.001275, text_loss=0.00119, over 2217.00 frames. ], tot_loss[loss=0.002318, acc=0.5208, codec_acc=0.2886, codec_topk_acc=0.7388, codec_loss=0.00135, text_loss=0.0009677, over 431793.26 frames. ], batch size: 4, lr: 1.02025-02025-04-242025-04-24 10:32:45,233 INFO [train.py:611] Epoch 1, batch 850, loss[loss=0.002389, acc=0.4684, codec_acc=0.2851, codec_topk_acc=0.7523, codec_loss=0.001309, text_loss=0.001079, over 2201.00 frames. ], tot_loss[loss=0.002265, acc=0.5289, codec_acc=0.2919, codec_topk_acc=0.7432, codec_loss=0.00133, text_loss=0.0009348, over 435962.88 frames. ], batch size: 8, 2025-02025-04-24 10:2025-04-24 10:33:10,418 INFO [train.py:611] Epoch 1, batch 900, loss[loss=0.001856, acc=0.6609, codec_acc=0.3064, codec_topk_acc=0.7668, codec_loss=0.001245, text_loss=0.0006107, over 2275.00 frames. ], tot_loss[loss=0.002258, acc=0.5269, codec_acc=0.2941, codec_topk_acc=0.7484, codec_loss=0.00132, text_loss=0.0009381, over 436722.62 frames. ], batch size:2025-2025-04-24 10:32025-04-24 10:33:42,218 INFO [train.py:611] Epoch 1, batch 950, loss[loss=0.002071, acc=0.5667, codec_acc=0.3211, codec_topk_acc=0.7881, codec_loss=0.001258, text_loss=0.0008129, over 2171.00 frames. ], tot_loss[loss=0.002241, acc=0.5288, codec_acc=0.2977, codec_topk_acc=0.7534, codec_loss=0.001304, text_loss=0.0009366, over 438324.03 frames. ], batch siz2025-04-24 10:34:09,2025-04-24 10:34:09,795 INFO [train.py:611] Epoch 1, batch 1000, loss[loss=0.002193, acc=0.5582, codec_acc=0.32, codec_topk_acc=0.7652, codec_loss=0.001277, text_loss=0.0009157, over 2186.00 frames. ], tot_loss[loss=0.002222, acc=0.5298, codec_acc=0.2997, codec_topk_acc=0.756, codec_loss=0.001294, text_loss=0.0009281, over 440062.56 frames. ], batch size2025-02025-04-24 10:34:34,361 INFO [train.py:611] Epoch 1, batch 1050, loss[loss=0.002093, acc=0.5851, codec_acc=0.2971, codec_topk_acc=0.773, codec_loss=0.001283, text_loss=0.0008099, over 2175.00 frames. ], tot_loss[loss=0.002231, acc=0.5274, codec_acc=0.3022, codec_topk_acc=0.7606, codec_loss=0.001289, text_loss=0.0009418, over 438497.21 frames. ], batch size: 7, lr: 1.00e2025-02025-04-24 102025-04-24 10:34:58,649 INFO [train.py:611] Epoch 1, batch 1100, loss[loss=0.002377, acc=0.495, codec_acc=0.3098, codec_topk_acc=0.7714, codec_loss=0.001365, text_loss=0.001012, over 2037.00 frames. ], tot_loss[loss=0.002216, acc=0.5314, codec_acc=0.3045, codec_topk_acc=0.7621, codec_loss=0.001285, text_loss=0.0009307, over 438461.82 frames. ], batch size: 2025-2025-04-24 12025-04-24 10:35:23,548 INFO [train.py:611] Epoch 1, batch 1150, loss[loss=0.002022, acc=0.6703, codec_acc=0.3232, codec_topk_acc=0.7854, codec_loss=0.001318, text_loss=0.0007048, over 2063.00 frames. ], tot_loss[loss=0.002191, acc=0.5353, codec_acc=0.3058, codec_topk_acc=0.7652, codec_loss=0.001271, text_loss=0.00092, over 441086.28 frames. ], batch size:20252025-04-24 10:35:49,553 INFO [train.py:611] Epoch 1, batch 1200, loss[loss=0.002054, acc=0.5695, codec_acc=0.3197, codec_topk_acc=0.7826, codec_loss=0.001252, text_loss=0.0008016, over 2163.00 frames. ], tot_loss[loss=0.002174, acc=0.5332, codec_acc=0.308, codec_topk_acc=0.7683, codec_loss=0.001257, text_loss=0.000917, over 443389.70 frames. ], batch size: 7, lr: 1.00e-042022025-04-24 10:32025-04-24 10:36:14,014 INFO [train.py:611] Epoch 1, batch 1250, loss[loss=0.002233, acc=0.4581, codec_acc=0.3163, codec_topk_acc=0.7836, codec_loss=0.001168, text_loss=0.001065, over 2309.00 frames. ], tot_loss[loss=0.002177, acc=0.5322, codec_acc=0.3094, codec_topk_acc=0.7706, codec_loss=0.001258, text_loss=0.0009186, over 441379.86 frames. ], batch size: 92025-04-24 10:36:2025-04-24 10:36:41,646 INFO [train.py:611] Epoch 1, batch 1300, loss[loss=0.002107, acc=0.5153, codec_acc=0.3151, codec_topk_acc=0.7696, codec_loss=0.001179, text_loss=0.0009276, over 2334.00 frames. ], tot_loss[loss=0.002176, acc=0.5351, codec_acc=0.3103, codec_topk_acc=0.7717, codec_loss=0.00126, text_loss=0.0009167, over 439733.97 frames. ], batch size: 12025-2025-04-24 10:37:05,201 INFO [train.py:611] Epoch 1, batch 1350, loss[loss=0.002071, acc=0.5378, codec_acc=0.3248, codec_topk_acc=0.7999, codec_loss=0.001188, text_loss=0.0008822, over 2218.00 frames. ], tot_loss[loss=0.002159, acc=0.5346, codec_acc=0.3123, codec_topk_acc=0.7744, codec_loss=0.001244, text_loss=0.0009141, over 443031.93 frames. ], batch size: 6, lr: 1.00e2025-04-24 10:37222025-04-24 10:37:29,785 INFO [train.py:611] Epoch 1, batch 1400, loss[loss=0.001897, acc=0.7, codec_acc=0.328, codec_topk_acc=0.7849, codec_loss=0.00122, text_loss=0.0006772, over 2202.00 frames. ], tot_loss[loss=0.00215, acc=0.5366, codec_acc=0.3141, codec_topk_acc=0.776, codec_loss=0.001236, text_loss=0.0009137, over 444387.06 frames. ], batch size: 3, lr: 20252025-0422025-04-24 10:38:06,632 INFO [train.py:611] Epoch 1, batch 1450, loss[loss=0.002106, acc=0.5481, codec_acc=0.3281, codec_topk_acc=0.7846, codec_loss=0.001198, text_loss=0.0009086, over 2235.00 frames. ], tot_loss[loss=0.002129, acc=0.5401, codec_acc=0.3145, codec_topk_acc=0.7775, codec_loss=0.001232, text_loss=0.0008975, over 444960.26 frames. ], batch size: 9, lr:20252025-04-24 10:38:31,572 INFO [train.py:611] Epoch 1, batch 1500, loss[loss=0.002097, acc=0.5333, codec_acc=0.3259, codec_topk_acc=0.7789, codec_loss=0.001138, text_loss=0.0009591, over 2376.00 frames. ], tot_loss[loss=0.002125, acc=0.5385, codec_acc=0.3155, codec_topk_acc=0.7792, codec_loss=0.001225, text_loss=0.0009001, over 445814.49 frames. ], batch size: 8, lr: 1.002025-042025-04-24 10:39:04,192 INFO [train.py:611] Epoch 1, batch 1550, loss[loss=0.002262, acc=0.4383, codec_acc=0.3338,2025-04-24 10:39:04,193 INFO [train.py:611] Epoch 1, batch 1550, loss[loss=0.002145, acc=0.4703, codec_acc=0.3372, codec_topk_acc=0.7999, codec_loss=0.001094, text_loss=0.001051, over 2395.00 frames. ], tot_loss[loss=0.002122, acc=0.5413, codec_acc=0.31722025-04-22025-04-24 10:39:29,331 INFO [train.py:611] Epoch 1, batch 1600, loss[loss=0.002115, acc=0.5523, codec_acc=0.3079, codec_topk_acc=0.7808, codec_loss=0.001267, text_loss=0.0008481, over 2160.00 frames. ], tot_loss[loss=0.002137, acc=0.5366, codec_acc=0.319, codec_topk_acc=0.7825, codec_loss=0.001223, text_loss=0.0009135, over 443176.44 frames. ], batch size: 7, lr: 1.02025-04-2025-04-24 10:39:58,465 INFO [train.py:611] Epoch 1, batch 1650, loss[loss=0.002335, acc=0.474, codec_acc=0.3107,22025-04-24 10:39:58,466 INFO [train.py:611] Epoch 1, batch 1650, loss[loss=0.002121, acc=0.5347, codec_acc=0.3419, codec_topk_acc=0.7934, codec_loss=0.001177, text_loss=0.0009439, over 2212.00 frames. ], tot_loss[loss=0.00216, acc=0.5325, codec_acc=0.3182,2025-04202025-04-24 10:40:22,653 INFO [train.py:611] Epoch 1, batch 1700, loss[loss=0.002178, acc=0.5314, codec_acc=0.2025-04-24 10:40:22,654 INFO [train.py:611] Epoch 1, batch 1700, loss[loss=0.00198, acc=0.551, codec_acc=0.3416, codec_topk_acc=0.8023, codec_loss=0.001149, text_loss=0.0008306, over 2223.00 frames. ], tot_loss[loss=0.00209, acc=0.5519, codec_acc=0.3197, codec_2025-2025-04-24 10:40:47,137 INFO [train.py:611] Epoch 1, batch 1750, loss[loss=0.001792, acc=0.5634, codec_acc=0.3256, codec_topk_acc=0.7919, codec_loss=0.001084, text_loss=0.0007081, over 2460.00 frames. ], tot_loss[loss=0.002123, acc=0.5358, codec_acc=0.3209, codec_topk_acc=0.7858, codec_loss=0.001212, text_loss=0.0009102, over 444031.94 frames. ], batch size: 3, lr: 1.002025-0422025-04-24 10:41:12,084 INFO [train.py:611] Epoch 1, batch 1800, loss[loss=0.001947, acc=0.501, codec_acc=0.3194, c2025-04-24 10:41:12,085 INFO [train.py:611] Epoch 1, batch 1800, loss[loss=0.001918, acc=0.5459, codec_acc=0.3365, codec_topk_acc=0.794, codec_loss=0.001085, text_loss=0.0008321, over 2411.00 frames. ], tot_loss[loss=0.00212, acc=0.5376, codec_acc=0.32122025-04202025-04-24 10:41:36,439 INFO [train.py:611] Epoch 1, batch 1850, loss[loss=0.002037, acc=0.487, codec_acc=0.3288, codec_topk_acc=0.795, codec_loss=0.001113, text_loss=0.0009248, over 2375.00 frames. ], tot_loss[loss=0.002118, acc=0.5417, codec_acc=0.3215, codec_topk_acc=0.7864, codec_loss=0.001217, text_loss=0.0009007, over 441514.89 frames. ], batch size: 8, lr: 1.2025-02022025-04-24 102025-04-24 10:42:01,654 INFO [train.py:611] Epoch 1, batch 1900, loss[loss=0.002008, acc=0.5859, code2025-04-24 10:42:01,654 INFO [train.py:611] Epoch 1, batch 1900, loss[loss=0.002129, acc=0.5155, codec_acc=0.3221, codec_topk_acc=0.7991, codec_loss=0.001193, text_loss=0.0009362, over 2205.00 frames. ], tot_loss[loss=0.002109, acc=0.5395, codec_acc=02025-042025-02025-04-24 10:42:27,237 INFO [train.py:611] Epoch 1, batch 1950, loss[loss=0.001667, acc=0.6793, codec_acc=0.3232, codec_topk_acc=0.7926, codec_loss=0.001098, text_loss=0.0005687, over 2444.00 frames. ], tot_loss[loss=0.002095, acc=0.538, codec_acc=0.3246, codec_topk_acc=0.7902, codec_loss=0.001195, text_loss=0.0009001, over 445952.45 frames. ], batch size: 6, lr:2025-020252025-04-24 10:42025-04-24 10:42:53,516 INFO [train.py:611] Epoch 1, batch 2000, loss[loss=0.002329, acc=0.682025-04-2025-04-24 10:42:53,516 INFO [train.py:611] Epoch 1, batch 2000, loss[loss=0.003257, acc=0.41, codec_acc=0.3227, codec_topk_acc=0.7984, codec_loss=0.001571, text_loss=0.001686, over 1698.00 frames. ], tot_loss[loss=0.002101, acc=0.5364, codec_acc=0.325920252025-2025-04-24 10:2025-04-24 10:43:29,643 INFO [train.py:611] Epoch 1, batch 2050, loss[loss=0.001908, acc=0.7095, codec_acc=0.3308, codec_topk_acc=0.799, codec_loss=0.001322, text_loss=0.0005862, over 1981.00 frames. ], tot_loss[loss=0.002095, acc=0.5413, codec_acc=0.326, codec_topk_acc=0.7923, codec_loss=0.001194, text_loss=0.000901, over 444263.27 frames. ], batch 2025-042025-04-24 10:43:2025-04-24 10:43:53,364 INFO [train.py:611] Epoch 1, batch 2100, loss[loss=0.00188, acc=0.6508, codec_acc=0.3211, codec_topk_acc=0.7825, codec_loss=0.001207, text_loss=0.0006734, over 2225.00 frames. ], tot_loss[loss=0.002092, acc=0.5416, codec_acc=0.3265, codec_topk_acc=0.7934, codec_loss=0.001195, text_loss=0.0008974, over 443282.60 frames. ], batch2025-04-2025-22025-04-24 10:44:20,963 INFO [train.py:611] Epoch 1, batch 2150, loss[loss=0.001828, acc=0.5594, codec_acc=0.3277, codec_topk_acc=0.8086, codec_loss=0.00106, text_loss=0.0007683, over 2438.00 frames. ], tot_loss[loss=0.002076, acc=0.5451, codec_acc=0.3267, codec_topk_acc=0.7935, codec_loss=0.001192, text_loss=0.0008846, over 444456.38 frames. ], batch size: 2025-04-2025-04-2025-04-242025-04-24 10:44:45,668 INFO [train.py:611] Epoch 1, batch 2200, loss[loss=0.001893, acc=0.59822025-2025-04-24 10:44:45,668 INFO [train.py:611] Epoch 1, batch 2200, loss[loss=0.00197, acc=0.5128, codec_acc=0.3417, codec_topk_acc=0.8041, codec_loss=0.001078, text_loss=0.0008912, over 2394.00 frames. ], tot_loss[loss=0.00209, acc=0.5395, codec_acc=0.32025-042025-04-2025-04-24 102025-04-24 10:45:13,879 INFO [train.py:611] Epoch 1, batch 2250, loss[loss=0.002069, acc=0.562, codec_acc=0.3277, codec_topk_acc=0.8111, codec_loss=0.001221, text_loss=0.0008483, over 2138.00 frames. ], tot_loss[loss=0.002077, acc=0.5501, codec_acc=0.3271, codec_topk_acc=0.7937, codec_loss=0.001195, text_loss=0.0008819, over 443056.35 frames. ], 2025-04-2025-04-24 2025-04-24 2025-04-24 10:45:41,600 INFO [train.py:611] Epoch 1, batch 2300, loss[loss=0.002627, acc=0.5054202025-04-24 10:45:41,600 INFO [train.py:611] Epoch 1, batch 2300, loss[loss=0.001914, acc=0.5192, codec_acc=0.3337, codec_topk_acc=0.8037, codec_loss=0.00109, text_loss=0.0008232, over 2379.00 frames. ], tot_loss[loss=0.002091, acc=0.5421, codec_acc=0.2025-042025-04-24 10:46:09,8942025-04-24 10:46:09,894 INFO [train.py:611] Epoch 1, batch 2350, loss[loss=0.002101, acc=0.4619, codec_acc=0.3387, codec_topk_acc=0.8098, codec_loss=0.00107, text_loss=0.001031, over 2402.00 frames. ], tot_loss[loss=0.002071, acc=0.5492, codec_acc=0.3286, codec_topk_acc=0.7958, codec_loss=0.00119, text_loss=0.0008812, over 442646.48 frames. ], b2025-04-22025-04-24 10:46:35,027 INFO [train.py:611] Epoch 1, batch 2400, loss[loss=0.00174, acc=0.6172, codec_acc=0.352025-04-24 10:46:35,028 INFO [train.py:611] Epoch 1, batch 2400, loss[loss=0.001867, acc=0.5458, codec_acc=0.3373, codec_topk_acc=0.8302, codec_loss=0.001076, text_loss=0.0007909, over 2300.00 frames. ], tot_loss[loss=0.002072, acc=0.5465, codec_acc=0.3306, 2025-04-2420252025-2025-04-24 10:47:01,239 INFO [train.py:611] Epoch 1, batch 2450, loss[loss=0.001812, acc=0.709, codec_acc=0.3265, codec_topk_acc=0.7798, codec_loss=0.001269, text_loss=0.0005433, over 2140.00 frames. ], tot_loss[loss=0.002051, acc=0.5497, codec_acc=0.3322, codec_topk_acc=0.799, codec_loss=0.001175, text_loss=0.0008762, over 444987.91 frames. ], batch size: 2025-04-2025-2025-04-24 10:47:25,801 INFO [train.py:611] Epoch 1, batch 2500, loss[loss=0.002055, acc=0.4951, codec_acc=0.3312, codec_topk_acc=0.8049, codec_loss=0.001098, text_loss=0.0009569, over 2373.00 frames. ], tot_loss[loss=0.002062, acc=0.5421, codec_acc=0.3326, codec_topk_acc=0.8003, codec_loss=0.001173, text_loss=0.0008898, over 444494.30 frames. ], batch size: 8, l2025-02025-04-2025-04-24 102025-04-24 10:47:50,561 INFO [train.py:611] Epoch 1, batch 2550, loss[loss=0.00283, acc=0.4627, codec_acc=0.3624, codec_topk_acc=0.8253, codec_loss=0.001479, text_loss=0.00135, over 1694.00 frames. ], tot_loss[loss=0.002068, acc=0.5428, codec_acc=0.3325, codec_topk_acc=0.8006, codec_loss=0.001174, text_loss=0.0008943, over 443838.99 frames. ], batch2025-2025-04-22025-04-24 10:48:15,791 INFO [train.py:611] Epoch 1, batch 2600, loss[loss=0.002273, acc=0.5212, codec_acc=0.3194, codec_topk_acc=0.7944, codec_loss=0.001263, text_loss=0.001011, over 2063.00 frames. ], tot_loss[loss=0.00206, acc=0.5434, codec_acc=0.3339, codec_topk_acc=0.8014, codec_loss=0.001171, text_loss=0.0008884, over 443973.92 frames. ], batch size: 5, l2025-042025-0422025-04-24 10:48:41,173 INFO [train.py:611] Epoch 1, batch 2650, loss[loss=0.001867, acc=0.512, codec_acc=0.3542, codec_topk_acc=0.809, codec_loss=0.001033, text_loss=0.000834, over 2450.00 frames. ], tot_loss[loss=0.002056, acc=0.5424, codec_acc=0.3346, codec_topk_acc=0.8026, codec_loss=0.001166, text_loss=0.0008903, over 444762.07 frames. ], batch size: 11, lr:20252025-04-242025-04-24 2025-04-24 10:49:06,673 INFO [train.py:611] Epoch 1, batch 2700, loss[loss=0.002147, acc=0.4286, codec2025-04-24 10:49:06,673 INFO [train.py:611] Epoch 1, batch 2700, loss[loss=0.002058, acc=0.5452, codec_acc=0.3372, codec_topk_acc=0.8008, codec_loss=0.001145, text_loss=0.000913, over 2272.00 frames. ], tot_loss[loss=0.002058, acc=0.5407, codec_acc=2025-02025-04-22025-04-24 10:49:34,480 INFO [train.py:611] Epoch 1, batch 2750, loss[loss=0.001886, acc=0.5605, codec_acc=0.3393, codec_topk_acc=0.8094, codec_loss=0.001041, text_loss=0.0008453, over 2454.00 frames. ], tot_loss[loss=0.002051, acc=0.5433, codec_acc=0.3351, codec_topk_acc=0.8039, codec_loss=0.001163, text_loss=0.0008879, over 444689.15 frames. ], batch size: 520252025-04-242025-04-24 12025-04-24 10:50:04,460 INFO [train.py:611] Epoch 1, batch 2800, loss[loss=0.002158, acc=0.5125, codec_acc=0.3334, codec_topk_acc=0.802, codec_loss=0.001215, text_loss=0.0009426, over 2134.00 frames. ], tot_loss[loss=0.002048, acc=0.545, codec_acc=0.3357, codec_topk_acc=0.8042, codec_loss=0.001166, text_loss=0.000882, over 443269.98 frames. ], batch 2025-2025-04-242025-04-24 10:50:34,568 INFO [train.py:611] Epoch 1, batch 2850, loss[loss=0.001694, acc=0.7531, codec_acc=0.3214, codec_topk_acc=0.793, codec_loss=0.001217, text_loss=0.0004764, over 2162.00 frames. ], tot_loss[loss=0.002054, acc=0.5442, codec_acc=0.3364, codec_topk_acc=0.8049, codec_loss=0.001162, text_loss=0.0008919, over 443785.01 frames. ], batch size: 5, l2022025-04-24 2025-04-242025-04-24 10:51:03,802 INFO [train.py:611] Epoch 1, batch 2900, loss[loss=0.002085, acc=0.4574, codec_a2025-04-24 10:51:03,802 INFO [train.py:611] Epoch 1, batch 2900, loss[loss=0.001944, acc=0.5415, codec_acc=0.3375, codec_topk_acc=0.8019, codec_loss=0.001117, text_loss=0.000826, over 2299.00 frames. ], tot_loss[loss=0.002056, acc=0.5383, codec_acc=02022025-04-24 2025-04-24 10:51:32,044 INFO [train.py:611] Epoch 1, batch 2950, loss[loss=0.00246, acc=0.5062, codec_acc=02025-02025-04-24 10:51:32,045 INFO [train.py:611] Epoch 1, batch 2950, loss[loss=0.00273, acc=0.3716, codec_acc=0.3395, codec_topk_acc=0.8141, codec_loss=0.001304, text_loss=0.001426, over 1950.00 frames. ], tot_loss[loss=0.002066, acc=0.5384, codec_acc=0.3322025-04-24 10:52:042025-04-24 10:52:04,837 INFO [train.py:611] Epoch 1, batch 3000, loss[loss=0.00206, acc=0.575, codec_acc=0.3266, codec_topk_acc=0.7924, codec_loss=0.00119, text_loss=0.0008703, over 2211.00 frames. ], tot_loss[loss=0.002057, acc=0.5421, codec_acc=0.3373, codec_topk_acc=0.8068, codec_loss=0.001158, text_loss=0.0008987, over 443476.17 frames. ], batch size: 22025-04-24 10:52:31,277 INFO [train.py:611] Epoch 1, batch 3050, loss[loss=0.00189, acc=0.5678, codec_acc=0.327, codec_topk_acc=0.8097, codec_loss=0.001111, text_loss=0.0007789, over 2310.00 frames. ], tot_loss[loss=0.00207, acc=0.5419, codec_acc=0.3374, codec_topk_acc=0.8065, codec_loss=0.001163, text_loss=0.0009065, over 441879.43 frames. ], batch size: 5, lr: 1.00e-04, 2025-04-24202025-04-24 10:52:56,048 INFO [train.py:611] Epoch 1, batch 3100, loss[loss=0.00206, acc=0.5554, codec_acc=0.3426, codec_topk_acc=0.8151, codec_loss=0.001137, text_loss=0.0009233, over 2227.00 frames. ], tot_loss[loss=0.002057, acc=0.54, codec_acc=0.3372, codec_topk_acc=0.8071, codec_loss=0.001154, text_loss=0.0009023, over 445004.06 frames. ], batch size: 10, lr:2025-04-24 12025-04-24 10:53:20,795 INFO [train.py:611] Epoch 1, batch 3150, loss[loss=0.002015, acc=0.5202, codec_acc=0.3492, codec_topk_acc=0.8232, codec_loss=0.001082, text_loss=0.0009338, over 2304.00 frames. ], tot_loss[loss=0.002054, acc=0.5434, codec_acc=0.3371, codec_topk_acc=0.807, codec_loss=0.001156, text_loss=0.0008982, over 444301.10 frames. ], batch size: 11, lr: 2025-04-24 10:53:44,996 INFO [train.py:611] Epoch 1, batch 3200, loss[loss=0.001923, acc=0.5577, codec_acc=0.3307, codec_topk_acc=0.7996, codec_loss=0.001121, text_loss=0.000802, over 2328.00 frames. ], tot_loss[loss=0.002052, acc=0.544, codec_acc=0.3383, codec_topk_acc=0.8078, codec_loss=0.001155, text_loss=0.0008963, over 443682.55 frames. ], batch size: 6, lr: 1.00e-04, 2025-04-22025-04-24 10:54:10,379 INFO [train.py:611] Epoch 1, batch 3250, loss[loss=0.001955, acc=0.5381, codec_acc=0.3387, codec_topk_acc=0.8127, codec_loss=0.001131, text_loss=0.0008237, over 2260.00 frames. ], tot_loss[loss=0.00204, acc=0.5432, codec_acc=0.3385, codec_topk_acc=0.8089, codec_loss=0.00115, text_loss=0.0008897, over 444904.80 frames. ], batch size: 7, lr: 12025-04-242025-2025-04-24 10:54:34,530 INFO [train.py:611] Epoch 1, batch 3300, loss[loss=0.002101, acc=0.4892, codec_acc=0.3489, codec_topk_acc=0.8126, codec_loss=0.001086, text_loss=0.001016, over 2325.00 frames. ], tot_loss[loss=0.002023, acc=0.5533, codec_acc=0.3393, codec_topk_acc=0.809, codec_loss=0.001153, text_loss=0.0008694, over 443280.96 frames. ], batch size: 12, l2025-04-24 10:55:05,299 INFO [train.py:611] Epoch 1, batch 3350, loss[loss=0.002181, acc=0.4785, codec_acc=0.3495, codec_topk_acc=0.8266, codec_loss=0.001078, text_loss=0.001103, over 2285.00 frames. ], tot_loss[loss=0.002045, acc=0.5448, codec_acc=0.3405, codec_topk_acc=0.81, codec_loss=0.001153, text_loss=0.0008921, over 442171.78 frames. ], batch size: 11, lr: 1.00e-04, 2025-04-2025-02025-04-24 10:55:30,802 INFO [train.py:611] Epoch 1, batch 3400, loss[loss=0.002503, acc=0.6345, codec_acc=0.3331, codec_topk_acc=0.8108, codec_loss=0.001529, text_loss=0.0009739, over 1682.00 frames. ], tot_loss[loss=0.002046, acc=0.5467, codec_acc=0.3404, codec_topk_acc=0.8108, codec_loss=0.001155, text_loss=0.0008919, over 440768.63 frames. ], batch size: 2,2025-042025-04-22025-04-24 10:55:59,785 INFO [train.py:611] Epoch 1, batch 3450, loss[loss=0.002008, acc=0.5133, codec_acc=0.3418, codec_topk_acc=0.8126, codec_loss=0.001077, text_loss=0.0009303, over 2344.00 frames. ], tot_loss[loss=0.002036, acc=0.5488, codec_acc=0.3406, codec_topk_acc=0.8111, codec_loss=0.001151, text_loss=0.0008857, over 441827.98 frames. ], batch size: 12025-04-24 10:56:24,358 INFO [train.py:611] Epoch 1, batch 3500, loss[loss=0.002372, acc=0.6102, codec_acc=0.3173, codec_topk_acc=0.8099, codec_loss=0.001402, text_loss=0.0009704, over 1832.00 frames. ], tot_loss[loss=0.002033, acc=0.5516, codec_acc=0.3413, codec_topk_acc=0.8116, codec_loss=0.001153, text_loss=0.0008799, over 440736.61 frames. ], batch size: 3, lr: 1.00e-04, 2025-04-24 10:56:48,982 INFO [train.py:611] Epoch 1, batch 3550, loss[loss=0.002072, acc=0.5299, codec_acc=0.3409, codec_topk_acc=0.8146, codec_loss=0.001148, text_loss=0.000924, over 2203.00 frames. ], tot_loss[loss=0.00202, acc=0.5562, codec_acc=0.3418, codec_topk_acc=0.8123, codec_loss=0.001151, text_loss=0.0008695, over 440793.84 frames. ], batch size: 8, lr: 1.00e-04, 2025-04-22025-04-24 102025-04-24 10:57:21,964 INFO [train.py:611] Epoch 1, batch 3600, loss[loss=0.001718, acc=0.7027, codec_acc=0.3495, codec_topk_acc=0.825, codec_loss=0.001159, text_loss=0.0005595, over 2151.00 frames. ], tot_loss[loss=0.002022, acc=0.5467, codec_acc=0.3422, codec_topk_acc=0.8129, codec_loss=0.001141, text_loss=0.0008808, over 444117.07 frames. ], batch siz2025-04-2025-04-24 102025-04-24 10:57:50,357 INFO [train.py:611] Epoch 1, batch 3650, loss[loss=0.001849, acc=0.52025-04-24 10:57:50,357 INFO [train.py:611] Epoch 1, batch 3650, loss[loss=0.00169, acc=0.6378, codec_acc=0.3585, codec_topk_acc=0.8302, codec_loss=0.001018, text_loss=0.0006723, over 2399.00 frames. ], tot_loss[loss=0.002029, acc=0.5472, codec_acc=0.3409, codec_to2025-042025-04-24 10:58:18,643 INFO [train.py:611] Epoch 1, batch 3700, loss[loss=0.001994, acc=0.4916, codec_acc2025-04-24 10:58:18,643 INFO [train.py:611] Epoch 1, batch 3700, loss[loss=0.002007, acc=0.5112, codec_acc=0.3427, codec_topk_acc=0.8142, codec_loss=0.001114, text_loss=0.0008934, over 2273.00 frames. ], tot_loss[loss=0.002028, acc=0.5451, codec_acc=0.3423, codec_2025-04-22025-04-24 10:58:43,214 INFO [train.py:611] Epoch 1, batch 3750, loss[loss=0.002629, acc=0.5217, codec_acc=0.3583, codec_topk_acc=0.8318, codec_loss=0.001453, text_loss=0.001177, over 1665.00 frames. ], tot_loss[loss=0.002031, acc=0.551, codec_acc=0.3434, codec_topk_acc=0.8141, codec_loss=0.00115, text_loss=0.0008811, over 439618.57 frames. ], batch size: 2, lr: 1.002025-04-24 102025-04-24 10:59:11,830 INFO [train.py:611] Epoch 1, batch 3800, loss[loss=0.001939, acc=0.5315, codec_acc=0.3518, codec_topk_acc=0.8121, codec_loss=0.001066, text_loss=0.0008733, over 2371.00 frames. ], tot_loss[loss=0.002012, acc=0.5497, codec_acc=0.3435, codec_topk_acc=0.8147, codec_loss=0.001135, text_loss=0.0008767, over 444509.02 frames. ], batch size: 8, l2025-042025-04-24 10:59:35,766 INFO [train.py:611] Epoch 1, batch 3850, loss[loss=0.00213, acc=0.5392, codec_acc=0.3326, codec_topk_acc=0.8082, codec_loss=0.00117, text_loss=0.0009597, over 2208.00 frames. ], tot_loss[loss=0.002023, acc=0.5505, codec_acc=0.3441, codec_topk_acc=0.8153, codec_loss=0.001145, text_loss=0.0008774, over 440366.61 frames. ], batch size: 10, lr: 12025-04-242025-04-22025-04-24 11:00:00,873 INFO [train.py:611] Epoch 1, batch 3900, loss[loss=0.002015, acc=0.4984, codec_acc=0.3376, codec_topk_acc=0.8178, codec_loss=0.001061, text_loss=0.0009542, over 2378.00 frames. ], tot_loss[loss=0.002003, acc=0.5513, codec_acc=0.3447, codec_topk_acc=0.816, codec_loss=0.00113, text_loss=0.0008732, over 445392.65 frames. ], batch size: 82025-04-24 11:00:24,789 INFO [train.py:611] Epoch 1, batch 3950, loss[loss=0.002053, acc=0.485, codec_acc=0.3306, codec_topk_acc=0.812025-04-24 11:00:24,790 INFO [train.py:611] Epoch 1, batch 3950, loss[loss=0.00206, acc=0.503, codec_acc=0.3501, codec_topk_acc=0.8274, codec_loss=0.001051, text_loss=0.001009, over 2353.00 frames. ], tot_loss[loss=0.001989, acc=0.5521, codec_acc2025-2025-04-24 2025-04-24 11:00:50,158 INFO [train.py:611] Epoch 1, batch 4000, loss[loss=0.001896, acc=0.5833, codec_acc=0.3655, codec_topk_acc=0.8349, codec_loss=0.001136, text_loss=0.0007597, over 2116.00 frames. ], tot_loss[loss=0.001999, acc=0.5495, codec_acc=0.3452, codec_topk_acc=0.817, codec_loss=0.001127, text_loss=0.0008723, over 445406.72 frames. ], batch siz2025-04-2025-04-24 11:01:14,999 INFO [train.py:611] Epoch 1, batch 4050, loss[loss=0.002735, acc=0.5324, codec_acc=0.3509, codec_topk_acc=0.8231, codec_loss=0.001417, text_loss=0.001318, over 1749.00 frames. ], tot_loss[loss=0.002027, acc=0.5494, codec_acc=0.3454, codec_topk_acc=0.8172, codec_loss=0.001141, text_loss=0.0008869, over 439810.41 frames. ], batch size: 16, lr2025-04-24 112025-04-24 11:01:40,739 INFO [train.py:611] Epoch 1, batch 4100, loss[loss=0.00185, acc=0.5637, codec_acc=0.353, codec_topk_ac2025-04-24 11:01:40,739 INFO [train.py:611] Epoch 1, batch 4100, loss[loss=0.00178, acc=0.558, codec_acc=0.3432, codec_topk_acc=0.8151, codec_loss=0.001039, text_loss=0.0007412, over 2401.00 frames. ], tot_loss[loss=0.002004, acc=0.5495, c2025-04-24 11:02:09,095 INFO [train.py:611] Epoch 1, batch 4150, loss[loss=0.002122, acc=0.491, codec_acc=0.3354, codec_topk_acc=0.8136, codec_loss=0.001096, text_loss=0.001026, over 2326.00 frames. ], tot_loss[loss=0.002003, acc=0.5544, codec_acc=0.3443, codec_topk_acc=0.8166, codec_loss=0.001134, text_loss=0.0008694, over 443176.76 frames. ], batch size: 6, lr: 1.00e-04, 2025-04-24 11:02:42,532 INFO2025-04-24 11:02:42,533 INFO [train.py:611] Epoch 1, batch 4200, loss[loss=0.001965, acc=0.4894, codec_acc=0.3469, codec_topk_acc=0.8224, codec_loss=0.001042, text_loss=0.0009225, over 2371.00 frames. ], tot_loss[loss=0.001995, acc=0.5492, codec_acc=0.3458, codec_topk_acc=0.8177, codec_loss=0.001124, text_loss=0.000871, over 445673.91 frames. ],2025-04-24 11:03:07,096 I2025-02025-04-24 11:03:07,097 INFO [train.py:611] Epoch 1, batch 4250, loss[loss=0.001888, acc=0.5291, codec_acc=0.36, codec_topk_acc=0.8225, codec_loss=0.001064, text_loss=0.0008246, over 2293.00 frames. ], tot_loss[loss=0.001998, acc=0.5494, codec_acc=0.3466, codec_topk_acc=0.8184, codec_loss=0.001124, text_loss=0.0008733, over 444825.67 frames. ], 2025-04-24 11:03:32,026 INFO [2025-04-24 11:03:32,026 INFO [train.py:611] Epoch 1, batch 4300, loss[loss=0.002157, acc=0.4691, c2025-04-24 11:03:32,026 INFO [train.py:611] Epoch 1, batch 4300, loss[loss=0.001969, acc=0.5369, codec_acc=0.3365, codec_topk_acc=0.8119, codec_loss=0.001143, text_loss=0.000826, over 2215.00 frames. ], tot_loss[loss=0.001983, acc=0.5534, codec_a2025-04-24 11:03:57,187 INFO [train.py:611] Epoch 1, batch 4350, loss[loss=0.001888, acc=0.5875, codec_acc=0.3502, codec_topk_acc=0.8148, codec_loss=0.001132, text_loss=0.0007561, over 2210.00 frames. ], tot_loss[loss=0.002015, acc=0.5451, codec_acc=0.3466, codec_topk_acc=0.8187, codec_loss=0.001126, text_loss=0.0008892, over 444122.44 frames. ], batch size: 6, lr: 1.00e-04, 2025-04-24 11:04:30,319 INFO [train.py:612025-04-24 11:04:30,320 INFO [train.py:611] Epoch 1, batch 4400, loss[loss=0.001724, acc=0.6011, codec_acc=0.3575, codec_topk_acc=0.8355, codec_loss=0.001013, text_loss=0.0007104, over 2369.00 frames. ], tot_loss[loss=0.002, acc=0.5468, codec_acc=0.3488, codec_topk_acc=0.8208, codec_loss=0.001121, text_loss=0.0008793, over 443823.53 f2025-04-24 11:05:2025-04-24 2025-04-24 12025-04-24 11:05:00,103 INFO [train.py:611] Epoch 1, batch 4450, loss[loss=0.001628, acc=0.6667, codec_acc=0.332, codec_topk_acc=0.8003, codec_loss=0.001098, text_loss=0.0005301, over 2362.00 frames. ], tot_loss[loss=0.001988, acc=0.5503, codec_acc=0.3485, codec_topk_acc=0.8203, codec_loss=0.001118, text_loss=0.0008698, over 445152025-04-24 11:05:33,588 INFO [train.py:611] Ep2025-04-24 11:05:33,588 INFO [train.py:611] Epoch 1, batch 4500, loss[loss=0.001813, acc=0.6042, codec_acc=0.3647, codec_topk_acc=0.8277, codec_loss=0.001056, text_loss=0.0007569, over 2281.00 frames. ], tot_loss[loss=0.001978, acc=0.5537, codec_acc=0.349, codec_topk_acc=0.8209, codec_loss=0.001116, text_loss=0.0008612, over 445122025-04-24 11:05:58,278 INFO [train.py:611] Epoch 1, batch 4550, loss[loss=0.001817, acc=0.5292, codec_acc=0.3591, codec_topk_acc=0.824, c2025-04-24 11:05:58,278 INFO [train.py:611] Epoch 1, batch 4550, loss[loss=0.001826, acc=0.5429, codec_acc=0.3483, codec_topk_acc=0.8215, codec_loss=0.001012, text_loss=0.000814, over 2451.00 frames. ], tot_loss[loss=0.001975, acc=0.547, cod2025-04-24 11:06:23,097 INFO [2025-04-24 11:06:23,098 INFO [train.py:611] Epoch 1, batch 4600, loss[loss=0.002069, acc=0.5048, codec_acc=0.3543, codec_topk_acc=0.8242, codec_loss=0.00106, text_loss=0.001008, over 2331.00 frames. ], tot_loss[loss=0.001986, acc=0.5497, codec_acc=0.35, codec_topk_acc=0.8232, codec_loss=0.001114, text_loss=0.0008718, over 443863.86 frames. ], batc2025-04-24 11:06:48,368 INFO [train.py:611] Epoch 1, batch 4650, loss[loss=0.001853, acc=0.5891, codec_acc=0.3599, codec_topk_acc=0.8301, codec_loss=0.001032, text_loss=0.0008211, over 2349.00 frames. ], tot_loss[loss=0.001987, acc=0.5485, codec_acc=0.3505, codec_topk_acc=0.8221, codec_loss=0.001113, text_loss=0.000874, over 444860.34 frames. ], batch size: 10, lr: 1.00e-04, 2025-04-24 11:07:13,421 INFO [train.py:611] Epoch 1, batch 4700, loss[loss=0.001864, acc=0.6338, codec_acc=0.3634, codec_topk_acc=0.8444, codec_loss=02025-04-24 11:07:13,421 INFO [train.py:611] Epoch 1, batch 4700, loss[loss=0.001798, acc=0.6061, codec_acc=0.3017, codec_topk_acc=0.8182, codec_loss=0.001143, text_loss=0.0006549, over 2269.00 frames. ], tot_loss[loss=0.001983, a2025-04-24 11:07:43,216 INFO [train.py:611] Epoch 1, batch 4750, loss[loss=0.001993, acc=0.5218, codec_acc=0.3514, codec_topk_acc=0.8263, codec_loss=0.001049, text_loss=0.0009437, over 2334.00 frames. ], tot_loss[loss=0.001994, acc=0.5475, codec_acc=0.3518, codec_topk_acc=0.8238, codec_loss=0.001114, text_loss=0.0008795, over 442983.42 frames. ], batch size: 13, lr: 1.00e-04, 2025-04-24 11:08:08,2025-04-24 11:08:08,126 INFO [train.py:611] Epoch 1, batch 4800, loss[loss=0.00206, acc=0.6071, codec_acc=0.3341, codec_topk_acc=0.8088, codec_loss=0.001194, text_loss=0.000866, over 2136.00 frames. ], tot_loss[loss=0.00199, acc=0.55, codec_acc=0.3505, codec_topk_acc=0.8241, codec_loss=0.001116, text_loss=0.0008743, over 442410.38 frames. ], batch size: 62025-04-24 11:08:2025-04-24 11:08:33,52025-04-24 11:08:33,579 INFO [train.py:611] Epoch 1, batch 4850, loss[loss=0.002114, acc=0.4799, codec_acc=0.3538, codec_topk_acc=0.827, codec_loss=0.001068, text_loss=0.001045, over 2305.00 frames. ], tot_loss[loss=0.001998, acc=0.5479, codec_acc=0.3516, codec_topk_acc=0.8238, codec_loss=0.001117, text_loss=0.0008817, over 441681.13 frame2025-04-24 11:08:58,462 INFO [train.py:611] Epoch 1, batch 4900, loss[loss=0.002033, acc=0.5495, codec_acc=0.3376, codec_topk_acc=0.8182, codec_loss=0.001155, text_loss=0.000878, over 2187.00 frames. ], tot_loss[loss=0.001983, acc=0.5548, codec_acc=0.3512, codec_topk_acc=0.8242, codec_loss=0.001117, text_loss=0.0008655, over 441525.28 frames. ], batch size: 6, lr: 1.00e-04, 2025-04-24 112025-04-24 11:09:25,52025-04-24 11:09:25,579 INFO [train.py:611] Epoch 1, batch 4950, loss[loss=0.002266, acc=0.5233, codec_acc=0.3642, codec_topk_acc=0.8374, codec_loss=0.001248, text_loss=0.001018, over 1900.00 frames. ], tot_loss[loss=0.002003, acc=0.5472, codec_acc=0.3515, codec_topk_acc=0.824, codec_loss=0.001118, text_loss=0.0008851, over 441084.61 frames2025-04-24 11:10:2025-04-24 11:10:01,970 INFO [train.py:539] Computing2025-04-24 11:10:05,688 INFO [train.py:548] Epoch 1, validation: loss=0.002071, acc=0.5651, codec_acc=0.3523, codec_topk_acc=0.8245, codec_loss=0.001175, text_loss=0.0008958, over 285507.00 frames. 2025-04-24 11:10:05,688 INFO [train.py:549] Maximum memory allocated so far is 47289MB 2025-04-24 11:10:12,969 INFO [train.py:611] Epoch 1, batch 5000, loss[loss=0.002073, acc=0.5248, codec_acc=0.3501, codec_topk_acc=0.818, codec_loss=0.001133, text_loss=0.0009399, over 2214.00 frames. ], tot_loss[loss=0.001981, acc=0.5541, codec_acc=0.3523, codec_topk_acc=0.8253, codec_loss=0.001115, text_loss=0.0008658, over 441426.28 frames. ], batch size: 8, lr: 1.00e-04, 2025-04-24 11:10:41,447 INFO [train.py:611] Epoch 1, batch 5050, loss[loss=0.001689, acc=0.6379, codec_acc=0.3546, codec_topk_acc=0.8255, codec_loss=0.001069, text_loss=0.0006204, over 2267.00 frames. ], tot_loss[loss=0.00197, acc=0.5587, codec_acc=0.3528, codec_topk_acc=0.825, codec_loss=0.001115, text_loss=0.0008549, over 441383.25 frames. ], batch size: 4, lr: 1.00e-04, 2025-04-24 11:11:16,214 INFO [train.py2025-04-24 11:11:16,215 INFO [train.py:611] Epoch 1, batch 5100, loss[loss=0.00188, acc=0.6168, codec_acc=0.3532, codec_topk_acc=0.8365, codec_loss=0.001154, text_loss=0.0007252, over 2117.00 frames. ], tot_loss[loss=0.002004, acc=0.5479, codec_acc=0.3521, codec_topk_acc=0.8244, codec_loss=0.001119, text_loss=0.0008852, over 440284.66 fr2025-04-24 11:12025-04-24 11:11:42,247 INFO [train.py:611] Epoch 1, batch 5150, loss[loss=0.002073, acc=0.4524, codec_acc=0.374, codec_topk_acc=0.8359, codec_loss=0.001008, text_loss=0.001066, over 2343.00 frames. ], tot_loss[loss=0.002, acc=0.5454, codec_acc=0.3527, codec_topk2025-04-24 11:11:42,247 INFO [train.py:611] Epoch 1, batch 5150, loss[loss=0.001997, acc=0.503, codec2025-04-24 11:12:11,611 INFO [train.2025-04-24 11:12:11,611 INFO [train.py:611] Epoch 1, batch 5200, loss[loss=0.002013, acc=0.5075, codec_acc=0.3768, codec_topk_acc=0.8499, codec_loss=0.001053, text_loss=0.0009594, over 2206.00 frames. ], tot_loss[loss=0.001991, acc=0.546, co2025-04-24 11:12:11,611 INFO [train.py:611] Epoch 1, batch 5200, loss[loss=0.002122, acc=0.4759, codec2025-04-24 11:12:36,474 INFO [train.py:611] Epoch 1, batch 5250, loss[loss=0.002068, acc=0.5906, codec_acc=0.3434, codec_topk_acc=0.8294, codec_loss=0.001188, text_loss=0.0008799, over 2064.00 frames. ], tot_loss[loss=0.001984, acc=0.5547, codec_acc=0.3533, codec_topk_acc=0.82025-04-24 11:12:36,474 INFO [train.py:611] Epoch 1, batch 5250, loss[loss=0.00186, acc=0.6929, codec_2025-04-22025-04-24 11:13:02,114 I2025-04-24 11:13:02,115 INFO [train.py:611] Epoch 1, batch 5300, loss[loss=0.002011, acc=0.4776, codec_acc=0.3608, codec_topk_acc=0.8416, codec_loss=0.001063, text_loss=0.0009479, over 2226.00 frames. ], tot_loss[loss=0.001988, acc=0.5521, codec_acc=0.3536, codec_topk_acc=0.8269, codec_loss=0.001116, text_loss=0.0008718, over 439206.48 frames2025-04-24 11:13:27,305 INFO [train.py:611] Epoch 1, batch 5350, loss[loss=0.002559, acc=0.4795, codec_acc=0.3451, codec_topk_acc=0.8229, codec_loss=0.001289, text_loss=0.00127, over 1911.00 frames. ], tot_loss[loss=0.001991, acc=0.5514, codec_acc=0.3536, codec_topk_acc=0.8264, codec_loss=0.001114, text_loss=0.0008777, over 440570.07 frames. ], batch size: 2, lr: 1.00e-04, 2025-04-24 11:13:54,558 INFO [train.py:611] Epoch 1, batch 5400, loss[loss=0.00203, acc=0.483, codec_acc=0.3564, codec_topk_acc=0.8328, codec_loss=0.001071, text_loss=0.0009594, over 2251.00 frames. ], tot_loss[loss=0.001994, acc=0.5482, codec_acc=0.355, codec_topk_acc=0.8275, codec_loss=0.001109, text_loss=0.0008851, over 441219.51 frames. ], batch size: 14, lr: 1.00e-04, 2025-04-24 112025-04-24 11:14:18,919 2025-04-24 11:14:18,920 INFO [train.py:611] Epoch 1, batch 5450, loss[loss=0.002025, acc=0.525, codec_acc=0.3613, codec_topk_acc=0.823, codec_loss=0.001085, text_loss=0.0009395, over 2253.00 frames. ], tot_loss[loss=0.0012025-04-24 11:14:18,920 INFO [train.py:611] Epoch 1, batch 5450, loss[loss=0.002013, acc=0.537, codec_acc=0.3482, codec_2025-04-24 11:14:44,630 INFO [train.py:611] Epoch 1, batch 5500, loss[loss=0.002129, acc=0.4903, codec_acc=0.359, codec_topk_acc=0.8245, codec_loss=0.001098, text_loss=0.001031, over 2232.00 frames. ], tot_loss[loss=0.002003, acc=0.5443, codec_acc=0.3561, codec_topk_acc=0.8285, codec_loss=0.001108, text_loss=0.0008952, over 440207.17 frames. ], batch size: 14, lr: 1.00e-04, 2025-04-24 11:12025-04-24 2025-04-24 11:15:09,165 INFO [train.py:611] Epoch 1, batch 5550, loss[loss=0.001887, acc=0.7286, codec_acc=0.3575, codec_topk_acc=0.8095, codec_loss=0.001278, text_loss=0.0006089, over 1963.00 frames. ], tot_loss[loss=0.001977, acc=0.5529, codec_acc=0.32025-04-24 11:15:09,165 INFO [train.py:611] Epoch 1, batch 5550, loss[loss=0.001703, acc=0.58732025-04-24 11:15:43,320 INFO [train.py:611] Epoch 1, batch 5600, loss[loss=0.001895, acc=0.5517, codec_acc=0.3567, codec_topk_acc=0.8351, codec_loss=0.001061, text_loss=0.0008342, over 2277.00 frames. ], tot_loss[loss=0.001993, acc=0.5492, codec_acc=0.356, codec_topk_acc=0.8287, codec_loss=0.001109, text_loss=0.0008845, over 439722.08 frames. ], batch size: 8, lr: 1.00e-04, 2025-04-24 11:16:02025-04-24 11:16:07,200 INFO [train.py:611] Epoch 1, batch 5650, loss[loss=0.001996, acc=0.5166, codec_acc=0.3552, codec_topk_acc=0.8287, codec_loss=0.001044, text_loss=0.0009518, over 2345.00 frames. ], tot_loss[loss=0.00197, acc=0.5524, codec_ac2025-04-24 11:16:07,201 INFO [train.py:611] Epoch 1, batch 5650, loss[loss=0.002069, acc=0.4604, codec_acc=0.35492025-04-24 11:16:2025-04-24 11:16:36,671 INFO [train.py:611] Epoch 1, batch 5700, loss[loss=0.002063, acc=0.5541, codec_acc=0.3536, codec_topk_acc=0.8252, codec_loss=0.001158, text_loss=0.000905, over 2133.00 frames. ], tot_loss[loss=0.001963, acc=0.554, codec_acc=0.3555, codec_topk_acc=0.8292, codec_loss=0.001097, text_loss=0.0008658, over 443851.57 frames. ], batch size: 6,2025-04-24 11:17:06,779 INFO [train.py:611] Epoch 1, batch 5750, loss[loss=0.001879, acc=0.541, codec_acc=0.3691, codec_topk_acc=0.8394, codec_loss=0.001004, text_loss=0.0008758, over 2354.00 frames. ], tot_loss[loss=0.001972, acc=0.5529, codec_acc=0.3568, codec_topk_acc=0.8302, codec_loss=0.001099, text_loss=0.0008727, over 441998.52 frames. ], batch size: 12, lr: 1.00e-04, 2025-04-24 11:12025-04-24 11:17:45,719 INFO [train.py:611] Epoch 1, batch 5800, loss[loss=0.002008, acc=0.4957, codec_acc=0.3693, codec_topk_acc=0.8411, codec_loss=0.00102, text_loss=0.0009881, over 2319.00 frames. ], tot_loss[loss=0.001969, acc=0.547, codec_acc=0.3566, codec_topk_acc=0.8307, codec_loss=0.001091, text_loss=0.0008771, over 444804.59 frames. ], batch size: 12, 2025-04-24 11:18:11,175 INFO [train.py:611] Epoch 1, batch 5850, loss[loss=0.002029, acc=0.4649, codec_acc=0.3616, codec_topk_acc=0.8369, codec_loss=0.001019, text_loss=0.00101, over 2325.00 frames. ], tot_loss[loss=0.001972, acc=0.55, codec_acc=0.3572, codec_topk_2025-04-24 11:18:11,175 INFO [train.py:611] Epoch 1, batch 5850, loss[loss=0.00208, acc=0.4566, codec_acc=0.3662025-04-24 11:18:32025-04-24 11:18:36,024 INFO [train.py:611] Epoch 1, batch 5900, loss[loss=0.002072, acc=0.5504, codec_acc=0.3555, codec_topk_acc=0.8293, codec_loss=0.001096, text_loss=0.0009762, over 2220.00 frames. ], tot_loss[loss=0.001964, acc=0.5496, codec_acc=0.3573, codec_topk_acc=0.8316, codec_loss=0.001091, text_loss=0.0008736, over 444060.82 frames. ], batch size2025-04-24 11:19:00,2025-04-24 11:19:00,852 INFO [train.py:611] Epoch 1, batch 5950, loss[loss=0.002098, acc=0.5036, codec_acc=0.3563, codec_topk_acc=0.8303, codec_loss=0.001099, text_loss=0.000999, over 2221.00 frames. ], tot_loss[loss=0.001971, acc=0.5472, codec_ac2025-04-24 11:192025-04-24 11:19:00,852 INFO [train.py:611] Epoch 1, batch 5950, loss[loss=0.001955, acc=0.5046,2025-04-24 11:19:26,510 INFO [train.py:611] Epoch 1, batch 6000, loss[loss=0.002029, acc=0.5326, codec_acc=0.358, codec_topk_acc=0.8298, codec_loss=0.001088, text_loss=0.0009411, over 2239.00 frames. ], tot_loss[loss=0.001966, acc=0.552, codec_acc=0.3576, codec_topk_acc=0.8322, codec_loss=0.001095, text_loss=0.0008706, over 441832.05 frames. ], batch size: 14, lr: 1.00e-04, 2025-04-24 11:19:2025-04-24 11:19:50,846 INFO [train.py:611] Epoch 1, batch 6050, loss[loss=0.001752, acc=0.6182, codec_acc=0.3796, codec_topk_acc=0.8503, codec_loss=0.001072, text_loss=0.0006799, over 2151.00 frames. ], tot_loss[loss=0.001952, acc=0.5517, codec_acc=0.3581, codec_topk2025-04-24 11:19:50,846 INFO [train.py:611] Epoch 1, batch 6050, loss[loss=0.00171, acc=0.72025-04-24 11:20:15,420 INFO [train.py:611] Epoch 1, batch 6100, loss[loss=0.002332, acc=0.4478, codec_acc=0.3447, codec_topk_acc=0.8234, codec_loss=0.001128, text_loss=0.001204, over 2218.00 frames. ], tot_loss[loss=0.00195, acc=0.5578, codec_acc=0.3583, codec_topk_acc=0.8324, codec_loss=0.001092, text_loss=0.0008576, over 442869.67 frames. ], batch size: 9, lr: 1.00e-04, 2025-04-24 11:20:40,722 INFO 2025-04-24 11:20:40,723 INFO [train.py:611] Epoch 1, batch 6150, loss[loss=0.002299, acc=0.5021, codec_acc=0.3599, codec_topk_acc=0.8401, codec_loss=0.001178, text_loss=0.001121, over 2015.00 frames. ], tot_loss[loss=0.001943, acc=0.5582, codec_acc=0.3596, codec_topk_acc=0.8328, codec_loss=0.001089, text_loss=0.0008545, over 443179.45 frames. ], ba2025-04-24 11:21:10,42025-04-24 11:21:10,446 INFO [train.py:611] Epoch 1, batch 6200, loss[loss=0.001804, acc=0.5944, codec_acc=0.3601, codec_topk_acc=0.8393, codec_loss=0.001044, text_loss=0.0007598, over 2290.00 frames. ], tot_loss[loss=0.001975, acc=0.5478, codec_acc=0.3599, codec_topk_acc=0.8328, codec_loss=0.001091, text_loss=0.0008838, over 441588.13 frames. ], batch si2025-04-24 11:21:36,243 INFO [train.py:611] Epoch 1, batch 6250, loss[loss=0.001675, acc=0.6542, codec_acc=0.3434, codec_topk_acc=0.8164, codec_loss=0.001055, text_loss=0.0006202, over 2384.00 frames. ], tot_loss[loss=0.001954, acc=0.5599, codec_acc=0.3584, codec_topk_acc=0.8326, codec_loss=0.001096, text_loss=0.0008587, over 441350.07 frames. ], batch size: 8, lr: 1.00e-04, 2025-04-24 11:22:02,368 INFO [train.py:611] Epoch 1, batch 6300, loss[loss=0.002557, acc=0.5097, codec_acc=0.3399, codec_topk_acc=0.8237, codec_loss=0.001321, text_loss=0.001235, over 1877.00 frames. ], tot_loss[loss=0.001961, acc=0.5561, codec_acc=0.3589, codec_topk_acc=0.8331, codec_loss=0.001094, text_loss=0.0008664, over 441199.10 frames. ], batch size: 2, lr: 1.00e-04, 2025-04-24 11:22:30,429 INFO [train.py:611] Epoch 1, batch 6350, loss[loss=0.001854, acc=0.5231, codec_acc=0.3796, codec_topk_acc=0.8521, codec_loss=0.0009865, text_loss=0.0008671, over 2328.00 frames. ], tot_loss[loss=0.001956, acc=0.5565, codec_acc=0.3596, codec_topk_acc=0.8343, codec_loss=0.001092, text_loss=0.0008634, over 440779.53 frames. ], batch size: 13, lr: 1.00e-04, 2025-04-24 11:22:59,574 INFO2025-04-24 11:22:59,574 INFO [train.py:611] Epoch 1, batch 6400, loss[loss=0.001598, acc=0.6711, codec_acc=0.3628, codec_topk_acc=0.8501, codec_loss=0.0009686, text_loss=0.0006295, over 2424.00 frames. ], tot_loss[loss=0.001956, acc=0.555, codec_acc=0.36,2025-04-24 11:22:59,575 INFO [train.py:611] Epoch 1, batch 6400, loss[loss=0.001745, acc=0.591,2025-04-24 11:23:29,130 INF2025-04-24 11:23:29,130 INFO [train.py:611] Epoch 1, batch 6450, loss[loss=0.001898, acc=0.5281, codec_acc=0.3757, codec_topk_acc=0.8416, codec_loss=0.001003, text_loss=0.000895, over 2320.00 frames. ], tot_loss[loss=0.00195, acc=0.5532, codec_acc=0.361, codec_topk_acc=0.8354, codec_loss=0.001084, text_loss=0.000866, over 442732.77 frames. ], batch si2025-04-24 11:23:58,109 INFO [train.py:611] Epoch 1, batch 6500, loss[loss=0.002345, acc=0.494, codec_acc=0.3621, codec_topk_acc=0.831, codec_loss=0.001177, text_loss=0.001169, over 2051.00 frames. ], tot_loss[loss=0.001933, acc=0.5584, codec_acc=0.3619, codec_topk_acc=0.8358, codec_loss=0.00108, text_loss=0.0008525, over 443599.33 frames. ], batch size: 16, lr: 1.00e-04, 2025-04-24 11:24:262025-04-24 11:24:26,327 INFO [train.py:611] Epoch 1, batch 6550, loss[loss=0.00269, acc=0.4286, codec_acc=0.3546, codec_topk_acc=0.8514, codec_loss=0.001474, text_loss=0.001216, over 1635.00 frames. ], tot_loss[loss=0.001942, acc=0.5511, codec_acc=0.3619, codec_topk_acc=0.8357, codec_loss=0.001078, text_loss=0.0008643, over 444605.26 frames. ], batch size: 22025-04-24 11:24:55,431 INFO [train.py:611] Epoch 1, batch 6600, loss[loss=0.001712, acc=0.6421, codec_acc=0.3649, codec_topk_acc=0.8343, codec_loss=0.001057, text_loss=0.0006559, over 2271.00 frames. ], tot_loss[loss=0.001942, acc=0.5575, codec_acc=0.3609, codec_topk_acc=0.8351, codec_loss=0.001085, text_loss=0.0008578, over 442734.08 frames. ], batch size: 7, lr: 1.00e-04, 2025-04-24 11:25:20,707 INFO [train.py:611] Epoch 1, batch 6650, loss[loss=0.00181, acc=0.5768, codec_acc=0.3701, codec_topk_acc=0.8363, codec_loss=0.00103, text_loss=0.0007806, over 2299.00 frames. ], tot_loss[loss=0.001952, acc=0.5539, codec_acc=0.3608, codec_topk_acc=0.8355, 2025-04-24 11:25:20,707 INFO [train.py:611] Epoch 1, batch 6650, loss[loss=0.001823, acc=0.5397, co2025-04-24 11:25:46,312 INFO [train.py:611] Epoch 1, batch 6700, loss[loss=0.001867, acc=0.549, codec_acc=0.371, codec_topk_acc=0.8521, codec_loss=0.0009818, text_loss=0.0008849, over 2333.00 frames. ], tot_loss[loss=0.001947, acc=0.5528, codec_acc=0.3613, codec_topk_acc=0.8363, codec_loss=0.001079, text_loss=0.0008676, over 443970.99 frames. ], batch size: 12, lr: 1.00e-04, 2025-04-24 11:26:11,948 INFO [train.py:611] Epoch 1, batch 6750, loss[loss=0.002092, acc=0.5126, codec_acc=0.3649, codec_topk_acc=0.8436, codec_loss=0.001052, text_loss=0.00104, over 2234.00 frames. ], tot_loss[loss=0.001947, acc=0.5517, codec_acc=0.3612, codec_topk_acc=0.8359, codec_loss=0.001079, text_loss=0.000868, over 444438.48 frames. ], batch size: 14, lr: 1.00e-04, 2025-04-24 11:26:37,451 I2025-04-24 11:26:37,451 INFO [train.py:611] Epoch 1, batch 6800, loss[loss=0.00189, acc=0.4852, codec_acc=0.3732, codec_topk_acc=0.844, codec_loss=0.0009686, text_loss=0.0009214, over 2414.00 frames. ], tot_loss[loss=0.001926, acc=0.5552, codec_acc=0.3629, codec_topk_acc=0.8367, codec_loss=0.001071, text_loss=0.0008544, over 445596.16 frames. ], batch s2025-04-24 11:27:01,8482025-04-24 11:27:01,849 INFO [train.py:611] Epoch 1, batch 6850, loss[loss=0.002015, acc=0.5316, codec_acc=0.3717, codec_topk_acc=0.8602, codec_loss=0.001057, text_loss=0.000958, over 2137.00 frames. ], tot_loss[loss=0.001926, acc=0.5551, codec_acc=0.3636, codec_topk_acc=0.8374, codec_loss=0.001072, text_loss=0.0008539, over 444791.48 frames. ], batch si2025-04-24 11:27:26,990 INFO [train.py:611] Epoch 1, batch 6900, loss[loss=0.001913, acc=0.6174, codec_acc=0.3732, codec_topk_acc=0.8479, codec_loss=0.001075, text_loss=0.0008379, over 2150.00 frames. ], tot_loss[loss=0.00193, acc=0.5538, codec_acc=0.3634, codec_topk_acc=0.8377, co2025-04-24 11:27:26,992 INFO [train.py:611] Epoch 1, batch 6900, loss[loss=0.001881, acc=0.5182,2025-04-24 11:27:55,876 INFO [train.py:611] Epoch 1, batch 6950, loss[loss=0.002318, acc=0.594, codec_acc=0.3487, codec_topk_acc=0.8236, codec_loss=0.001256, text_loss=0.001061, over 1957.00 frames. ], tot_loss[loss=0.001924, acc=0.5563, codec_acc=0.364, codec_top2025-04-24 11:27:55,876 INFO [train.py:611] Epoch 1, batch 6950, loss[loss=0.002486, acc=0.4404, codec_acc=0.32025-02025-04-24 11:28:20,254 INFO [train.py:611] Epoch 1, batch 7000, loss[loss=0.001967, acc=0.5289, codec_acc=0.3639, codec_topk_acc=0.8369, codec_loss=0.00105, text_loss=0.0009172, over 2279.00 frames. ], tot_loss[loss=0.001938, acc=0.5552, codec_acc=0.3642, codec_topk_acc=0.8373, 2025-04-24 11:28:20,255 INFO [train.py:611] Epoch 1, batch 7000, loss[loss=0.001945, acc=0.492025-04-24 11:28:44,844 INFO [train.py:611] Epoch 1, batch 7050, loss[loss=0.001989, acc=0.4798, codec_acc=0.3667, codec_topk_acc=0.8459, codec_loss=0.001006, text_loss=0.0009829, over 2329.00 frames. ], tot_loss[loss=0.001924, acc=0.5568, codec_acc=0.3637, codec_topk_acc=0.8381, codec_loss=0.001072, text_loss=0.0008525, over 444595.66 frames. ], batch size: 12, lr: 1.00e-04, 2025-04-24 11:29:28,708 INFO [train.py:611] Epoch 1, batch 7100, loss[loss=0.001833, acc=0.5419, codec_acc=0.3558, codec_topk_acc=0.8488, codec_loss=0.001012, text_loss=0.0008212, over 2316.00 frames. ], tot_loss[loss=0.001925, acc=0.5561, codec_acc=0.3643, codec_topk_acc=0.8388, codec_loss=0.00107, text_loss=0.0008553, over 444782.61 frames. ], batch size: 9, lr: 1.00e-04, 2025-04-24 11:29:52,6942025-04-24 11:29:52,694 INFO [train.py:611] Epoch 1, batch 7150, loss[loss=0.001856, acc=0.5389, codec_acc=0.3707, codec_topk_acc=0.8462, codec_loss=0.001003, text_loss=0.0008525, over 2326.00 frames. ], tot_loss[loss=0.001928, acc=0.5585, codec_ac2025-04-24 11:29:52,694 INFO [train.py:611] Epoch 1, batch 7150, loss[loss=0.002023, acc=0.4906, codec_ac2025-04-2025-04-24 11:30:26,651 INFO [train.py:611] Epoch 1, batch 7200, loss[loss=0.001894, acc=0.5224, codec_acc=0.3755, codec_topk_acc=0.8438, codec_loss=0.001015, text_loss=0.0008786, over 2297.00 frames. ], tot_loss[loss=0.00194, acc=0.554, codec_acc=0.3637, codec_topk_acc=0.8382, codec_loss=0.001076, text_loss=0.0008643, over 442811.87 frames. ], batch size: 9, lr: 1.002025-04-24 11:30:55,560 INFO [train.py:611] Epoch 1, batch 7250, loss[loss=0.001923, acc=0.5491, codec_acc=0.3405, codec_topk_acc=0.8251, codec_loss=0.001084, text_loss=0.000839, over 2295.00 frames. ], tot_loss[loss=0.001929, acc=0.556, codec_acc=0.3646, codec_topk_acc=0.8398, codec_loss=0.001071, text_loss=0.0008578, over 443538.93 frames. ], batch size: 4, lr: 1.00e-04, 20252025-04-24 11:31:20,626 INFO [train.py:611] Epoch 1, batch 7300, loss[loss=0.001547, acc=0.7978, codec_acc=0.3433, codec_topk_acc=0.8005, codec_loss=0.001125, text_loss=0.0004227, over 2286.00 frames. ], tot_loss[loss=0.001934, acc=0.5541, codec_acc=0.3648, codec_topk_acc=0.8388, codec_loss=0.001072, text_loss=0.0008627, over 443544.69 frames. ], batch size: 4, lr: 1.00e2025-02025-04-24 11:31:45,758 INFO [train.py:611] Epoch 1, batch 7350, loss[loss=0.002085, acc=0.4928, codec_acc=0.3565, codec_topk_acc=0.8301, codec_loss=0.001052, text_loss=0.001033, over 2302.00 frames. ], tot_loss[loss=0.001932, acc=0.5532, codec_acc=0.3653, codec_topk_acc=0.8396, codec_loss=0.001068, text_loss=0.0008639, over 444101.03 frames. ], batch size: 5, lr: 1.00e-2025-04-24 11:32:10,247 INFO [train.py:611] Epoch 1, batch 7400, loss[loss=0.001639, acc=0.6961, codec_acc=0.3775, codec_topk_acc=0.8544, codec_loss=0.001066, text_loss=0.0005721, over 2134.00 frames. ], tot_loss[loss=0.001929, acc=0.5524, codec_acc=0.3659, codec_topk_acc=0.8407, codec_loss=0.001064, text_loss=0.000865, over 445040.89 frames. ], batch size: 6, lr: 1.00e-04, 2025-2025-04-24 11:32:34,498 INFO [train.py:611] Epoch 1, batch 7450, loss[loss=0.001947, acc=0.6463, codec_acc=0.336, codec_topk_acc=0.8212, codec_loss=0.001285, text_loss=0.000662, over 1968.00 frames. ], tot_loss[loss=0.00191, acc=0.563, codec_acc=0.3649, codec_topk_acc=0.8398, codec_loss=0.001071, text_loss=0.0008395, over 443250.16 frames. ], batch size: 3, lr: 1.00e-042022025-04-24 11:33:00,397 INFO [train.p2025-04-24 11:33:00,398 INFO [train.py:611] Epoch 1, batch 7500, loss[loss=0.002483, acc=0.5145, codec_acc=0.369, codec_topk_acc=0.8398, codec_loss=0.001326, text_loss=0.001157, over 1780.00 frames. ], tot_loss[loss=0.001927, acc=0.5561, codec_acc=0.3657, codec_topk_acc=0.8399, codec_loss=0.001069, text_loss=0.0008583, over 443245.25 f20252025-04-24 11:33:31,205 INFO [train.py:611] Epoch 1, batch 7550, loss[loss=0.00201, acc=0.4907, codec_acc=0.3583, codec_topk_acc=0.8491, codec_loss=0.001004, text_loss=0.001006, over 2331.00 frames. ], tot_loss[loss=0.001919, acc=0.5583, codec_acc=0.3669, codec_topk_acc=0.2025-04-24 11:33:31,205 INFO [train.py:611] Epoch 1, batch 7550, loss[loss=0.001909, acc=0.5453, codec2022025-04-24 11:33:56,12025-04-24 11:33:2025-04-24 11:33:56,145 INFO [train.py:611] Epoch 1, batch 7600, loss[loss=0.001907, acc=0.491, codec_acc=0.3709, codec_topk_acc=0.8439, codec_loss=0.0009868, text_loss=0.00092, over 2371.00 frames. ], tot_loss[loss=0.001935, acc=0.55082025-04-24 11:33:56,146 INFO [train.py:611] Epoch 1, batch 7600, loss[loss=0.001741, acc=0.5709, codec2025-04-24 11:34:25,7822025-04-24 11:34:25,783 INFO [train.py:611] Epoch 1, batch 7650, loss[loss=0.002037, acc=0.6594, codec_acc=0.3484, codec_topk_acc=0.828, codec_loss=0.00132, text_loss=0.0007169, over 1840.00 frames. ], tot_loss[loss=0.001946, acc=0.5521, codec_acc=0.3669, codec_topk_acc=0.8417, codec_loss=0.001071, text_loss=0.0008756, over 440699.41 frames. ], batch s2022025-04-24 11:34:56,444 INFO [train.py:611] Epoch 1, batch 7700, loss[loss=0.002011, acc=0.5292, codec_acc=0.3692, codec_topk_acc=0.8473, codec_loss=0.001105, text_loss=0.0009056, over 2123.00 frames. ], tot_loss[loss=0.001962, acc=0.5443, codec_acc=0.3675, codec_topk_acc=0.8432, codec_loss=0.001069, text_loss=0.0008936, over 440334.89 frames. ], batch size: 6, lr: 1.00e-042022025-04-24 11:35:23,129 INFO [train.py:611] Epoch 1, batch 7750, loss[loss=0.001867, acc=0.5319, codec_acc=0.3818, codec_topk_acc=0.8625, codec_loss=0.0009837, text_loss=0.0008836, over 2282.00 frames. ], tot_loss[loss=0.001947, acc=0.5433, codec_acc=0.3687, codec_topk_acc=0.8441, codec_loss=0.001059, text_loss=0.0008887, over 443368.26 frames. ], batch size: 7, lr: 1.00e2025-04-24 11:35:48,2762025-04-24 11:35:48,276 INFO [train.py:611] Epoch 1, batch 7800, loss[loss=0.002033, acc=0.5439, codec_acc=0.344, codec_topk_acc=0.8308, codec_loss=0.001102, text_loss=0.0009311, over 2206.00 frames. ], tot_loss[loss=0.001934, acc=0.5501, codec_acc=0.3679, codec_topk_acc=0.8431, codec_loss=0.001061, text_loss=0.0008731, over 443113.25 frames. ], batc2025-04-2025-04-24 11:36:19,148 INFO [train.py:611] Epoch 1, batch 7850, loss[loss=0.001985, acc=0.6233, codec_acc=0.3769, codec_topk_acc=0.8497, codec_loss=0.001194, text_loss=0.0007904, over 1899.00 frames. ], tot_loss[loss=0.001932, acc=0.5477, codec_acc=0.37, codec_topk_acc=0.8444, codec_loss=0.001055, text_loss=0.000877, over 443771.66 frames. ], batch size: 2, lr: 1.002025-04-24 11:36:54,538 INF2025-04-24 11:36:54,538 INFO [train.py:611] Epoch 1, batch 7900, loss[loss=0.002041, acc=0.4947, codec_acc=0.383, codec_topk_acc=0.8442, codec_loss=0.001069, text_loss=0.0009713, over 2178.00 frames. ], tot_loss[loss=0.001934, acc=0.5467, codec_acc=0.3694, codec_topk_acc=0.8445, codec_loss=0.001054, text_loss=0.0008804, over 444548.64 frames. ], batc2025-02025-04-24 11:37:24,2025-04-24 11:37:24,240 INFO [train.py:611] Epoch 1, batch 7950, loss[loss=0.002368, acc=0.4959, codec_acc=0.3583, codec_topk_acc=0.8348, codec_loss=0.001248, text_loss=0.00112, over 1915.00 frames. ], tot_loss[loss=0.001937, acc=0.5472, codec_acc=0.3695, codec_topk_acc=0.8444, codec_loss=0.001055, text_loss=0.0008817, over 444214.38 frames. ], batch2025-04-24 11:37:48,811 IN2025-04-24 11:37:48,811 INFO [train.py:611] Epoch 1, batch 8000, loss[loss=0.002173, acc=0.5577, codec_acc=0.3711, codec_topk_acc=0.8252, codec_loss=0.001205, text_loss=0.0009681, over 2004.00 frames. ], tot_loss[loss=0.001947, acc=0.549, codec_acc=0.3699, codec_topk_acc=0.8442, codec_loss=0.001062, text_loss=0.0008847, over 440985.12 frames. ], batch2025-2025-04-24 11:38:18,2025-04-24 112025-04-24 11:38:18,929 INFO [train.py:611] Epoch 1, batch 8050, loss[loss=0.001745, acc=0.5402, codec_acc=0.3776, codec_topk_acc=0.8473, codec_loss=0.0009649, text_loss=0.0007799, over 2384.00 frames. ], tot_loss[loss=0.001925, acc=0.554, codec_acc=0.3705, codec_topk_acc=0.8454, codec_loss=0.001058, text_loss=0.000867, over 441628.61 fram20252025-04-24 11:38:44,786 INFO [train.py:611] Epoch 1, batch 8100, loss[loss=0.001923, acc=0.5081, codec_acc=0.3766, codec_topk_acc=0.8447, codec_loss=0.001004, text_loss=0.0009186, over 2299.00 frames. ], tot_loss[loss=0.001929, acc=0.5498, codec_acc=0.3717, codec_topk_acc=0.8468, codec_loss=0.00105, text_loss=0.0008793, over 443501.14 frames. ], batch size: 9, lr: 1.00e-042022025-04-24 11:39:08,6272025-04-24 11:39:08,628 INFO [train.py:611] Epoch 1, batch 8150, loss[loss=0.002121, acc=0.4603, codec_acc=0.3631, codec_topk_acc=0.8324, codec_loss=0.001034, text_loss=0.001087, over 2309.00 frames. ], tot_loss[loss=0.00193, acc=0.5513, codec_acc=0.3705, codec_topk_acc=0.8453, codec_loss=0.001056, text_loss=0.0008741, over 442622.18 frames. ], batc2022025-04-24 11:39:34,112 INFO [train.py:611] Epoch 1, batch 8200, loss[loss=0.001939, acc=0.5354, codec_acc=0.3698, codec_topk_acc=0.8401, codec_loss=0.001027, text_loss=0.0009122, over 2300.00 frames. ], tot_loss[loss=0.001915, acc=0.5525, codec_acc=0.373, codec_topk_acc=0.8481, codec_loss=0.001046, text_loss=0.0008686, over 443598.06 frames. ], batch size: 9, lr: 1.00e-042022025-04-24 11:40:03,498 INFO [train.py:611] Epoch 1, batch 8250, loss[loss=0.0019, acc=0.6853, codec_acc=0.3371, codec_topk_acc=0.821, codec_loss=0.001224, text_loss=0.0006761, over 2031.00 frames. ], tot_loss[loss=0.00192, acc=0.5506, codec_acc=0.3729, codec_topk_acc=0.8485, codec_loss=0.001046, text_loss=0.0008739, over 443298.19 frames. ], batch size: 5, lr: 1.00e-04, 22025-04-24 11:40:29,712 INFO [train.py:611] Epoch 1, batch 8300, loss[loss=0.001866, acc=0.5036, codec_acc=0.383, codec_topk_acc=0.8579, codec_loss=0.0009411, text_loss=0.0009248, over 2403.00 frames. ], tot_loss[loss=0.00191, acc=0.5524, codec_acc=0.3732, codec_topk_acc=0.2025-04-24 11:40:29,7122025-04-24 11:40:29,712 INFO [train.py:611] Epoch 1, batch 8300, loss[loss=0.00192025-04-24 11:40:54,263 INFO [train.py:611] Epoch 1, batch 8350, loss[loss=0.00201, acc=0.4758, codec_acc=0.3515, codec_topk_acc=0.8408, codec_loss=0.001028, text_loss=0.000982, over 2318.00 frames. ], tot_loss[loss=0.001909, acc=0.5513, codec_acc=0.3722, codec_topk_acc=0.8481, codec_loss=0.001043, text_loss=0.000866, over 445384.92 frames. ], batch size: 9, lr: 1.00e-04, 2025-2025-04-24 11:41:19,939 INFO [train.py:611] Epoch 1, batch 8400, loss[loss=0.001861, acc=0.6264, codec_acc=0.3543, codec_topk_acc=0.849, codec_loss=0.001096, text_loss=0.000765, over 2122.00 frames. ], tot_loss[loss=0.001916, acc=0.5539, codec_acc=0.3725, codec_topk_acc2025-04-24 11:41:19,940 IN2025-04-24 11:41:19,940 INFO [train.py:611] Epoch 1, batch 8400, loss[loss=20252025-04-24 11:41:45,970 INFO [train.py:611] Epoch 1, batch 8450, loss[loss=0.001906, acc=0.5051, codec_acc=0.3892, codec_topk_acc=0.8613, codec_loss=0.000965, text_loss=0.0009412, over 2312.00 frames. ], tot_loss[loss=0.001911, acc=0.5539, codec_acc=0.3732, codec_topk_acc=02025-04-24 11:41:45,970 I2025-04-24 11:41:45,970 INFO [train.py:611] Epoch 1, batch 8450, loss[loss=0.202025-04-24 11:42:24,565 INFO [train.py:611] Epoch 1, batch 8500, loss[loss=0.001984, acc=0.5439, codec_acc=0.3794, codec_topk_acc=0.869, codec_loss=0.001097, text_loss=0.0008864, over 2008.00 frames. ], tot_loss[loss=0.001904, acc=0.5542, codec_acc=0.373, codec_topk_acc=0.8482, codec_loss=0.001045, text_loss=0.0008586, over 443864.59 frames. ], batch size: 3, lr: 1.00e-04, 2025-04-24 11:42:53,778 INFO [train.py:611] Epoch 1, batch 8550, loss[loss=0.002555, acc=0.5, codec_acc=0.3965, codec_topk_acc=0.8636, codec_loss=0.001325, text_loss=0.00123, over 1656.00 frames. ], tot_loss[loss=0.001908, acc=0.5519, codec_acc=0.3725, codec_topk_acc=0.8494, codec_loss=0.001044, text_loss=0.0008638, over 443523.16 frames. ], batch size: 2, lr: 1.00e-04, 20252025-04-24 11:43:23,302025-04-24 11:43:23,302025-04-24 11:43:23,307 INFO [train.py:611] Epoch 1, batch 8600, loss[loss=0.00196, acc=0.4763, codec_acc=0.3817, codec_topk_acc=0.8622, codec_loss=0.0009825, text_loss=0.0009776, over 2296.00 frames. ], tot_loss[loss=0.001908, ac2025-04-24 11:43:23,308 INFO [train.py:611] Epoch 1, batch 8600, loss[loss=0.002116, acc=0.4333, cod202025-04-24 11:43:52,472 INFO [train.py:611] Epoch 1, batch 8650, loss[loss=0.001843, acc=0.4991, codec_acc=0.3814, codec_topk_acc=0.863, codec_loss=0.0009398, text_loss=0.0009032, over 2382.00 frames. ], tot_loss[loss=0.001913, acc=0.5523, codec_acc=0.3735, codec_topk_acc=0.8485, codec_loss=0.001045, text_loss=0.0008674, over 443426.80 frames. ], batch size: 9, lr: 1.00e-04,2025-04-24 11:44:23,661 INFO2025-04-24 11:44:23,665 INFO [train.py:611] Epoch 1, batch 8700, loss[loss=0.001817, acc=0.5566, codec_acc=0.4191, codec_topk_acc=0.876, codec_loss=0.0009351, text_loss=0.0008822, over 2208.00 frames. ], tot_loss[loss=0.001888, acc=0.5558, codec_acc=0.3745, codec_topk_acc=0.8506, codec_loss=0.001035, text_loss=0.0008531, over 445816.85 frames. ],2025-04-24 11:44:48,879 INFO [t2025-04-24 11:44:48,879 INFO [train.py:611] Epoch 1, batch 8750, loss[loss=0.001733, acc=0.5699, codec_acc=0.3972, codec_topk_acc=0.862, codec_loss=0.0009555, text_loss=0.0007774, over 2313.00 frames. ], tot_loss[loss=0.001892, acc=0.5584, codec_acc=0.3753, codec_topk_acc=0.2025-04-24 11:44:48,879 INFO [train.py:611] Epoch 1, batch 8750, loss[lo2025-04-24 11:45:14,544 INFO [train.py:611] Epoch 1, batch 8800, loss[loss=0.001922, acc=0.5103, codec_acc=0.3725, codec_topk_acc=0.8582, codec_loss=0.0009994, text_loss=0.0009227, over 2266.00 frames. ], tot_loss[loss=0.001904, acc=0.5498, codec_acc=0.3745, codec_topk_acc=0.851, codec_loss=0.001038, text_loss=0.0008666, over 444341.89 frames. ], batch size: 7, lr: 1.00e-04, 2025-04-24 11:45:45,298 INFO [train.py:611] Epoch 1, batch 8850, loss[loss=0.001943, acc=0.4873, codec_acc=0.365, codec_topk_acc=0.8487, codec_loss=0.0009808, text_loss=0.0009617, over 2343.00 frames. ], tot_loss[loss=0.001905, acc=0.552, codec_acc=0.3749, codec_topk_acc=0.8513, codec_loss=0.00104, text_loss=0.0008648, over 442929.56 frames. ], batch size: 13, lr: 1.00e-04, 2025-04-24 11:46:11,560 INFO [train.py:611] Epoch 1, batch 8900, loss[loss=0.001672, acc=0.5503, codec_acc=0.3763, codec_topk_acc=0.8559, codec_loss=0.0009285, text_loss=0.0007432, over 2462.00 frames. ], tot_loss[loss=0.001911, acc=0.5485, codec_acc=0.3753, codec_topk_acc=0.852, codec_loss=0.001038, tex2025-04-24 11:46:11,561 INFO [train.py:611] Epoch 1, batch 8900, loss[loss2025-2025-04-24 11:46:36,654 INFO [train.py:611] Epoch 1, batch 8950, loss[loss=0.001913, acc=0.5296, codec_acc=0.3578, codec_topk_acc=0.8455, codec_loss=0.001001, text_loss=0.0009115, over 2337.00 frames. ], tot_loss[loss=0.001905, acc=0.5539, codec_acc=0.3752, codec_topk_acc=0.851, codec_loss=0.001041, text_loss=0.0008639, over 442707.91 frames. ], batch size: 13, lr: 1.00e-2025-04-24 11:47:00,753 INFO [train.py:611] Epoc2025-04-24 11:47:00,754 INFO [train.py:611] Epoch 1, batch 9000, loss[loss=0.001782, acc=0.5545, codec_acc=0.3869, codec_topk_acc=0.8558, codec_loss=0.000982, text_loss=0.0007998, over 2290.00 frames. ], tot_loss[loss=0.001894, acc=0.5547, codec_acc=0.3762, codec_topk_acc=0.8525, codec_loss=0.001034, text_loss=0.0008596, over 4432025-2025-04-24 11:47:25,769 INFO [train.py:611] Epoch 1, batch 9050, loss[loss=0.001913, acc=0.6341, codec_acc=0.38, codec_topk_acc=0.85, codec_loss=0.001167, text_loss=0.0007463, over 1954.00 frames. ], tot_loss[loss=0.001908, acc=0.5567, codec_acc=0.3765, codec_topk_acc=0.852, codec_loss=0.001044, te2025-04-24 11:47:25,769 INFO [train.py:611] Epoch 1, batch 9050, loss[loss22025-04-24 11:47:50,119 INFO [train.py:611] Epoch 1, batch 9100, loss[loss=0.001893, acc=0.5342, codec_acc=0.384, codec_topk_acc=0.8584, codec_loss=0.0009994, text_loss=0.0008931, over 2268.00 frames. ], tot_loss[loss=0.001899, acc=0.5611, codec_acc=0.3766, codec_topk_acc=0.8523, codec_loss=0.001044, text_loss=0.0008555, over 440063.09 frames. ], batch size: 7, lr: 1.00e-042022025-04-24 11:48:16,122025-04-24 11:48:16,121 INFO [train.py:611] Epoch 1, batch 9150, loss[loss=0.001987, acc=0.542, codec_acc=0.3872, codec_topk_acc=0.8636, codec_loss=0.001029, text_loss=0.000958, over 2162.00 frames. ], tot_loss[loss=0.001906, acc=0.5504, codec_acc=0.3778, codec_topk_acc=0.8546, codec_loss=0.001031, text_loss=0.0008745, over 443035.39 frames. ], batch s2025-04-24 11:48:44,614 INFO [train.py:611] Epoch 1, batch 9200, loss[loss=0.001556, acc=0.6401, codec_acc=0.3828, codec_topk_acc=0.8494, codec_loss=0.0009817, text_loss=0.0005748, over 2318.00 frames. ], tot_loss[loss=0.001884, acc=0.5591, codec_acc=0.3781, codec_topk_acc=2025-04-24 11:48:44,614 INFO [train.py:611] Epoch 1, batch 9200, loss[loss=0.001947, acc=0.5224, codec_ac202025-04-24 11:49:13,290 INFO [train.py:611] Epoch 1, batch 9250, loss[loss=0.001485, acc=0.6235, codec_acc=0.399, codec_topk_acc=0.8743, codec_loss=0.0008904, text_loss=0.0005947, over 2414.00 frames. ], tot_loss[loss=0.001896, acc=0.56, codec_acc=0.3779, codec_topk_acc=0.8543, codec_loss=0.001038, text_loss=0.000858, over 440090.19 frames. ], batch size: 7, lr: 1.00e-04, 2025-04-24 11:49:42,212025-04-24 11:49:42,214 I2025-04-24 11:49:42,214 INFO [train.py:611] Epoch 1, batch 9300, loss[loss=0.001991, acc=0.5714, codec_acc=0.3574, codec_topk_acc=0.8762, codec_loss=0.001068, text_loss=0.0009223, over 2097.00 frames. ], tot_loss[loss=0.00182025-04-24 11:49:42,214 INFO [tra2025-04-24 11:49:42,214 INFO [train.py:611] Epoch 1, batch 9300, loss[los2025-04-24 11:50:21,469 2025-04-24 11:50:21,471 INFO [train.py:611] Epoch 1, batch 9350, loss[loss=0.00182, acc=0.5291, codec_acc=0.3892, codec_topk_acc=0.8404, codec_loss=0.000937, text_loss=0.0008828, over 2491.00 frames. ], tot_loss[loss=0.001886, acc=0.5569, codec_acc=0.3798, codec_topk_acc=0.8567, codec_loss=0.001027, text_loss=0.0008583, over 441985.47 frames. ], batch s2025-04-24 11:50:51,403 INFO [train.py:611] Epoch 1, batch 9400, loss[loss=0.001921, acc=0.5063, codec_acc=0.3856, codec_topk_acc=0.863, codec_loss=0.0009666, text_loss=0.0009544, over 2321.00 frames. ], tot_loss[loss=0.001876, acc=0.5594, codec_acc=0.3793, codec_topk_acc=0.8563, codec_loss=0.001028, text_loss=0.0008484, over 442552.80 frames. ], batch size: 6, lr: 1.00e-04, 2025-04-24 11:51:16,5632025-04-24 11:51:16,563 INFO [train.py:611] Epoch 1, batch 9450, loss[loss=0.002167, acc=0.6414, codec_acc=0.351, codec_topk_acc=0.8449, codec_loss=0.001402, text_loss=0.0007654, over 1698.00 frames. ], tot_loss[loss=0.001873, acc=0.5603, codec2025-04-24 11:51:16,563 INFO [train.py:611] Epoch 1, batch 9450, loss[loss=0.001953, acc=0.4138, codec_acc=0.3882025-04-24 11:51:40,746 INFO [train.py:611] Epoch 1, batch 9500, loss[loss=0.00155, acc=0.6935, codec_acc=0.3757, codec_topk_acc=0.8646, codec_loss=0.0009584, text_loss=0.0005916, over 2321.00 frames. ], tot_loss[loss=0.001885, acc=0.5532, codec_acc=0.381, codec_topk_acc=0.8579, codec_loss=0.001023, text_loss=0.0008624, over 442193.47 frames. ], batch size: 5, lr: 1.00e-04, 22025-04-24 11:52:05,828 INFO [train.py:611] Epoch 1, batch 9550, loss[loss=0.001641, acc=0.6633, codec_acc=0.3986, codec_topk_acc=0.8596, codec_loss=0.001023, text_loss=0.0006187, over 2125.00 frames. ], tot_loss[loss=0.001888, acc=0.5537, codec_acc=0.3811, codec_topk_acc=0.8576, codec_loss=0.001023, text_loss=0.0008658, over 442419.15 frames. ], batch size: 4, lr: 1.00e-04, 22025-04-24 11:52:31,048 INFO [train.py:611] Epoch 1, batch 9600, loss[loss=0.002618, acc=0.5, codec_acc=0.3808, codec_topk_acc=0.8621, codec_loss=0.001299, text_loss=0.001319, over 1722.00 frames. ], tot_loss[loss=0.001886, acc=0.5515, codec_acc=0.3821, codec_topk_a2025-04-24 11:52:31,048 INFO [train.py:611] Epoch 1, batch 9600, loss[loss=0.002365, acc=0.5015, codec_acc=0.322025-04-24 11:53:01,6252025-04-24 11:53:01,625 INFO [train.py:611] Epoch 1, batch 9650, loss[loss=0.001832, acc=0.4846, codec_acc=0.3901, codec_topk_acc=0.8715, codec_loss=0.000941, text_loss=0.0008912, over 2338.00 frames. ], tot_loss[loss=0.001865, acc=0.5563, codec_acc=0.3827, codec_topk_acc=02025-04-24 11:53:01,626 INFO [train.py:611] Epoch 1, batch 9650, loss[loss=0.00172025-04-24 11:53:27,081 INFO [train.py:611] Epoch 1, batch 9700, loss[loss=0.001829, acc=0.5361, codec_acc=0.3928, codec_topk_acc=0.8714, codec_loss=0.0009457, text_loss=0.0008831, over 2311.00 frames. ], tot_loss[loss=0.001879, acc=0.5545, codec_acc=0.3834, codec_topk_acc=0.8594, codec_loss=0.001016, text_loss=0.0008633, over 443270.31 frames. ], batch size: 12, lr: 1.00e-04, 2025-04-24 11:53:52,120 I2025-04-24 11:53:52,121 INFO [train.py:611] Epoch 1, batch 9750, loss[loss=0.001807, acc=0.521, codec_acc=0.378, codec_topk_acc=0.8603, codec_loss=0.0009222, text_loss=0.0008847, over 2447.00 frames. ], tot_loss[loss=0.001864, acc=0.5555, codec_acc=0.3849, codec_topk_acc=0.8616, codec_loss=0.001007, text_loss=0.0008573, over 444889.73 frames. ], batc2025-04-24 11:54:16,951 INF2025-04-24 11:54:16,951 INFO [train.py:611] Epoch 1, batch 9800, loss[loss=0.001869, acc=0.596, codec_acc=0.3911, codec_topk_acc=0.8586, codec_loss=0.001047, text_loss=0.0008221, over 2181.00 frames. ], tot_loss[loss=0.001871, acc=0.5559, codec_acc=0.3849, codec_topk_acc2025-04-24 11:54:16,951 INFO [train.py:611] Epoch 1, batch 9800, loss[loss=0.0020252025-04-24 11:54:47,0802025-04-24 11:54:47,082 INFO [train.py:611] Epoch 1, batch 9850, loss[loss=0.001859, acc=0.5217, codec_acc=0.375, codec_topk_acc=0.8719, codec_loss=0.0009773, text_loss=0.0008815, over 2279.00 frames. ], tot_loss[loss=0.001865, acc=0.5535, codec_acc=0.3855, codec_topk_acc=0.863, codec_loss=0.001007, text_loss=0.0008585, over 443835.96 frames. ], bat2025-04-24 11:55:13,453 INF2025-04-24 11:55:132025-04-24 11:55:13,453 INFO [train.py:611] Epoch 1, batch 9900, loss[loss=0.001811, acc=0.5559, codec_acc=0.378, codec_topk_acc=0.8676, codec_loss=0.001006, text_loss=0.0008049, over 2213.00 frames. ], tot_loss[loss=0.001864, acc=0.5568, codec_acc=0.3866, codec_topk_acc=0.8632, codec_loss=0.001005, text_loss=0.0008595, over 4437632025-04-24 11:55:43,849 INFO [train.py:611] Epoch 1, batch 9950, loss[loss=0.001699, acc=0.557, codec_acc=0.3813, codec_topk_acc=0.8663, codec_loss=0.0009491, text_loss=0.0007499, over 2340.00 frames. ], tot_loss[loss=0.001883, acc=0.5492, codec_acc=0.3847, codec_topk_acc=0.8629, codec_loss=0.001009, text_loss=0.0008747, over 443002.37 frames. ], batch size: 6, lr: 1.00e-04, 2025-04-24 11:56:07,777 INF2025-04-24 11:56:072025-04-24 11:56:07,779 202025-04-24 11:56:16,037 I2025-04-24 11:56:16,037 INFO [train.py:548] Epoch 1, validation: loss=0.001937, acc=0.5731, codec_acc=0.3887, codec_topk_acc=0.8662, codec_loss=0.001056, text_loss=0.0008808,2025-04-24 11:56:16,037 I2025-04-24 11:56:16,038 INFO [train.py:549] Maximum memory a2025-04-24 11:56:24,841 INFO [train.py:611] Epoch 1, batch 10000, loss[loss=0.002193, acc=0.5797, codec_acc=0.4115, codec_topk_acc=0.8881, codec_loss=0.001234, text_loss=0.0009595, over 1676.00 frames. ], tot_loss[loss=0.001874, acc=0.5496, codec_acc=0.3858, codec_topk_acc=0.8637, codec_loss=0.001004, text_loss=0.0008702, over 443828.68 frames. ], batch size: 2, lr: 1.00e-04, 2025-04-24 11:56:54,729 INFO [train.py:611] Epoch 1, batch 10050, loss[loss=0.001778, acc=0.5412, codec_acc=0.3848, codec_topk_acc=0.8632, codec_loss=0.0009646, text_loss=0.0008138, over 2321.00 frames. ], tot_loss[loss=0.001873, acc=0.5507, codec_acc=0.3859, codec_topk_acc=0.864, codec_loss=0.001006, text_loss=0.0008673, over 442682.33 frames. ], batch size: 6, lr: 1.00e-04,2025-04-24 11:57:29,885 INFO [train.py:611] Epoch 1, batch 10100, loss[loss=0.002037, acc=0.451, codec_acc=0.4, codec_topk_acc=0.8703, codec_loss=0.0009316, text_loss=0.001106, over 2335.00 frames. ], tot_loss[loss=0.00185, acc=0.5543, codec_acc=0.3895, codec_topk_acc=0.8671, codec_loss=0.0009923, text_loss=0.0008575, over 444860.14 frames. ], batch size: 12, lr: 1.00e-04, 2025-2025-04-24 11:57:55,052 INF2025-04-24 11:57:55,053 INFO [train.py:611] Epoch 1, batch 10150, loss[loss=0.002015, acc=0.4973, codec_acc=0.4049, codec_topk_acc=0.8766, codec_loss=0.0009634, text_loss=0.001051, over 2235.00 frames. ], tot_loss[loss=0.001863, acc=0.5545, codec_acc=0.3892, codec_topk_acc=0.8669, codec_loss=0.0009978, text_loss=0.0008656, over 442383.61 frames. ],2025-04-24 11:58:24,631 INFO [train.py:611] Epoch 1, batch 10200, loss[loss=0.002121, acc=0.5105, codec_acc=0.3687, codec_topk_acc=0.8564, codec_loss=0.001069, text_loss=0.001052, over 2135.00 frames. ], tot_loss[loss=0.001855, acc=0.5543, codec_acc=0.3903, codec_topk_acc=0.8681, codec_loss=0.0009909, text_loss=0.0008644, over 444514.08 frames. ], batch size: 6, lr: 1.00e-04, 2025-04-24 11:58:49,417 INFO [train.py:611] Epoch 1, batch 10250, loss[loss=0.001606, acc=0.5819, codec_acc=0.396, codec_topk_acc=0.8726, codec_loss=0.0008933, text_loss=0.0007129, over 2423.00 frames. ], tot_loss[loss=0.001858, acc=0.5539, codec_acc=0.3908, codec_topk_acc=0.8677, codec_loss=0.0009905, text_loss=0.0008673, over 444752.98 frames. ], batch size: 7, lr: 1.00e-04, 2025-04-24 11:59:15,232 INFO [train.py:611] Epoch 1, batch 10300, loss[loss=0.001864, acc=0.6009, codec_acc=0.3631, codec_topk_acc=0.8467, codec_loss=0.001091, text_loss=0.0007736, over 2158.00 frames. ], tot_loss[loss=0.001852, acc=0.5554, codec_acc=0.3903, codec_topk_acc=0.8679, codec_loss=0.0009911, text_loss=0.0008605, over 444461.70 frames. ], batch size: 7, lr: 1.00e-04, 2022025-04-24 11:59:41,583 INFO [train.py:611] Epoch 1, batch 10350, loss[loss=0.001859, acc=0.5054, codec_acc=0.3947, codec_topk_acc=0.8762, codec_loss=0.0009539, text_loss=0.0009055, over 2280.00 frames. ], tot_loss[loss=0.001861, acc=0.5517, codec_acc=0.3901, codec_topk_acc=0.8678, codec_loss=0.0009915, text_loss=0.000869, over 444083.43 frames. ], batch size: 8, lr: 1.00e-02025-04-24 12:00:05,722 INFO [train.py:611] Epoch 1, batch 10400, loss[loss=0.001924, acc=0.5915, codec_acc=0.4141, codec_topk_acc=0.8941, codec_loss=0.001115, text_loss=0.0008087, over 1852.00 frames. ], tot_loss[loss=0.001847, acc=0.5573, codec_acc=0.391, codec_topk_acc=0.8687, codec_loss=0.0009899, text_loss=0.0008567, over 443951.58 frames. ], batch size: 2, lr: 1.00e-04, 2022025-04-24 12:00:31,747 INFO [train.py:611] Epoch 1, batch 10450, loss[loss=0.002051, acc=0.5717, codec_acc=0.3838, codec_topk_acc=0.866, codec_loss=0.001148, text_loss=0.0009036, over 1943.00 frames. ], tot_loss[loss=0.001865, acc=0.5509, codec_acc=0.3913, codec_topk_acc=0.8694, codec_loss=0.0009916, text_loss=0.000873, over 442233.70 frames. ], batch size: 16, lr: 1.00e2025-04-24 12:01:07,233 INF2025-04-24 12:01:07,234 INFO [train.py:611] Epoch 1, batch 10500, loss[loss=0.001875, acc=0.5049, codec_acc=0.3956, codec_topk_acc=0.8648, codec_loss=0.0009318, text_loss=0.0009431, over 2377.00 frames. ], tot_loss[loss=0.001864, acc=0.5529, codec_acc=0.3929, codec_topk_acc=0.8702, codec_loss=0.0009926, text_loss=0.000871, over 440726.93 frames. ], bat2025-04-24 12:01:31,466 INFO [train.py:611] Epoch 1, batch 10550, loss[loss=0.002126, acc=0.5086, codec_acc=0.3914, codec_topk_acc=0.8773, codec_loss=0.001096, text_loss=0.00103, over 1966.00 frames. ], tot_loss[loss=0.001846, acc=0.5576, codec_acc=0.3928, codec_topk_acc=0.8704, codec_loss=0.0009899, text_loss=0.0008565, over 441628.06 frames. ], batch size: 4, lr: 1.00e-04, 2025-04-24 12:01:56,392 INFO [train.py:611] Epoch 1, batch 10600, loss[loss=0.001854, acc=0.4889, codec_acc=0.3882, codec_topk_acc=0.8691, codec_loss=0.0009138, text_loss=0.0009404, over 2405.00 frames. ], tot_loss[loss=0.001845, acc=0.5592, codec_acc=0.3927, codec_topk_acc=0.8707, codec_loss=0.0009908, text_loss=0.0008539, over 441220.79 frames. ], batch size: 7, lr: 1.00e-04, 2025-04-22025-04-24 12:02:20,116 INFO [train.py:611] Epoch 1, batch 10650, loss[loss=0.001846, acc=0.5133, codec_acc=0.3848, codec_topk_acc=0.8473, codec_loss=0.0009549, text_loss=0.0008912, over 2377.00 frames. ], tot_loss[loss=0.001836, acc=0.5627, codec_acc=0.3924, codec_topk_acc=0.8702, codec_loss=0.0009885, text_loss=0.0008478, over 442781.81 frames. ], batch size: 8, lr: 1.2025-04-24 12:03:00,173 INFO [train.py:611] Epoch 1, batch 10700, loss[loss=0.001719, acc=0.5569, codec_acc=0.4012, codec_topk_acc=0.8743, codec_loss=0.0009457, text_loss=0.0007731, over 2306.00 frames. ], tot_loss[loss=0.001851, acc=0.5533, codec_acc=0.3937, codec_topk_acc=0.8721, codec_loss=0.0009845, text_loss=0.0008668, over 442445.97 frames. ], batch size: 6, lr: 1.00e-04, 2025-04-24 12:03:34,189 INFO [train.py:611] Epoch 1, batch 10750, loss[loss=0.001714, acc=0.6778, codec_acc=0.4024, codec_topk_acc=0.873, codec_loss=0.00107, text_loss=0.0006442, over 2006.00 frames. ], tot_loss[loss=0.001841, acc=0.5543, codec_acc=0.3952, codec_topk_acc=0.873, codec_loss=0.0009784, text_loss=0.0008629, over 443871.79 frames. ], batch size: 3, lr: 1.00e-04, 2025-042025-04-24 12:03:58,761 INFO [train.py:611] Epoch 1, batch 10800, loss[loss=0.001641, acc=0.6127, codec_acc=0.4339, codec_topk_acc=0.8732, codec_loss=0.0009405, text_loss=0.0007005, over 2214.00 frames. ], tot_loss[loss=0.001833, acc=0.5552, codec_acc=0.3939, codec_topk_acc=0.872, codec_loss=0.0009774, text_loss=0.0008557, over 445483.63 frames. ], batch size: 3, lr: 1.00e2025-04-24 12:04:23,429 INFO [train.py:611] Epoch 1, batch 10850, loss[loss=0.001851, acc=0.5163, codec_acc=0.3865, codec_topk_acc=0.8604, codec_loss=0.0009257, text_loss=0.0009252, over 2440.00 frames. ], tot_loss[loss=0.001844, acc=0.5541, codec_acc=0.396, codec_topk_acc=0.8741, codec_loss=0.0009783, text_loss=0.0008661, over 442672.19 frames. ], batch size: 11, lr: 1.00e-04, 2025-04-24 12:04:49,382 INFO [train.py:611] Epoch 1, batch 10900, loss[loss=0.001778, acc=0.4926, codec_acc=0.3989, codec_topk_acc=0.8786, codec_loss=0.0008896, text_loss=0.0008888, over 2410.00 frames. ], tot_loss[loss=0.001838, acc=0.5523, codec_acc=0.397, codec_topk_acc=0.8753, codec_loss=0.0009717, text_loss=0.0008665, over 444280.60 frames. ], batch size: 7, lr: 1.00e-04, 202025-04-24 12:05:14,0622025-04-24 12:05:14,062 INFO [train.py:611] Epoch 1, batch 10950, loss[loss=0.001764, acc=0.5957, codec_acc=0.3864, codec_topk_acc=0.8695, codec_loss=0.000993, text_loss=0.000771, over 2223.00 frames. ], tot_loss[loss=0.001823, acc=0.5602, codec_acc=0.3965, codec_topk_acc=0.8749, codec_loss=0.0009754, text_loss=0.0008473, over 442564.46 frames. ], batch s22025-04-24 12:05:39,617 INFO [train.py:611] Epoch 1, batch 11000, loss[loss=0.001757, acc=0.5247, codec_acc=0.4149, codec_topk_acc=0.8876, codec_loss=0.0008789, text_loss=0.0008785, over 2364.00 frames. ], tot_loss[loss=0.001841, acc=0.5527, codec_acc=0.397, codec_topk_acc=0.8745, codec_loss=0.0009726, text_loss=0.0008689, over 444186.90 frames. ], batch size: 9, lr: 1.00e-042022025-04-24 12:06:10,432025-04-24 12:06:10,431 INFO [train.py:611] Epoch 1, batch 11050, loss[loss=0.001751, acc=0.5508, codec_acc=0.4084, codec_topk_acc=0.8912, codec_loss=0.0008772, text_loss=0.0008742, over 2322.00 frames. ], tot_loss[loss=0.001819, acc=0.5589, codec_acc=0.3979, codec_topk_acc=0.8762, codec_loss=0.0009699, text_loss=0.0008492, over 443537.71 frames. ], batch2025-04-24 12:06:34,343 INFO [train.py:611] Epoch 1, batch 11100, loss[loss=0.001611, acc=0.5714, codec_acc=0.3946, codec_topk_acc=0.8965, codec_loss=0.0009175, text_loss=0.0006931, over 2321.00 frames. ], tot_loss[loss=0.001829, acc=0.5549, codec_acc=0.3976, codec_topk_acc=0.8764, codec_loss=0.0009689, text_loss=0.0008603, over 444264.76 frames. ], batch size: 5, lr: 1.00e-04, 2025-04-24 12:06:59,976 INFO [train.py:611] Epoch 1, batch 11150, loss[loss=0.001449, acc=0.6828, codec_acc=0.396, codec_topk_acc=0.8623, codec_loss=0.0008991, text_loss=0.0005503, over 2434.00 frames. ], tot_loss[loss=0.001822, acc=0.5571, codec_acc=0.3989, codec_topk_acc=0.8771, codec_loss=0.0009677, text_loss=0.0008548, over 443693.11 frames. ], batch size: 4, lr: 1.00e-04, 2025-04-24 12:07:25,472 INFO [train.py:611] Epoch 1, batch 11200, loss[loss=0.00167, acc=0.5638, codec_acc=0.3789, codec_topk_acc=0.8723, codec_loss=0.0009547, text_loss=0.0007153, over 2289.00 frames. ], tot_loss[loss=0.001828, acc=0.554, codec_acc=0.3998, codec_topk_acc=0.8781, codec_loss=0.000965, text_loss=0.0008627, over 443385.14 frames. ], batch size: 3, lr: 1.00e-04, 202025-04-24 12:07:55,635 INFO [train.py:611] Epoch 1, batch 11250, loss[loss=0.001809, acc=0.4592, codec_acc=0.4169, codec_topk_acc=0.8917, codec_loss=0.000852, text_loss=0.0009574, over 2401.00 frames. ], tot_loss[loss=0.00181, acc=0.5589, codec_acc=0.4005, codec_topk_acc=0.8786, codec_loss=0.0009599, text_loss=0.0008496, over 445123.10 frames. ], batch size: 7, lr: 1.00e-04, 2025-04-24 12:08:21,504 INFO [train.py:611] Epoch 1, batch 11300, loss[loss=0.002332, acc=0.4365, codec_acc=0.3921, codec_topk_acc=0.8629, codec_loss=0.0009827, text_loss=0.00135, over 2239.00 frames. ], tot_loss[loss=0.001824, acc=0.5549, codec_acc=0.401, codec_topk_acc=0.8788, codec_loss=0.0009624, text_loss=0.000862, over 443473.46 frames. ], batch size: 3, lr: 1.00e-04, 2202025-04-24 12:08:56,342025-04-24 12:08:56,342 INFO [train.py:611] Epoch 1, batch 11350, loss[loss=0.001977, acc=0.4716, codec_acc=0.4103, codec_topk_acc=0.887, codec_loss=0.00092, text_loss=0.001057, over 2269.00 frames. ], tot_loss[loss=0.001812, acc=0.5573, codec_acc=0.4023, codec_topk_acc=0.8795, codec_loss=0.0009588, text_loss=0.0008532, over 443518.05 frames. ], batch size2025-042025-04-24 12:09:26,626 INFO [train.py:611] Epoch 1, batch 11400, loss[loss=0.002631, acc=0.4874, codec_acc=0.3806, codec_topk_acc=0.86, codec_loss=0.001223, text_loss=0.001408, over 1890.00 frames. ], tot_loss[loss=0.001823, acc=0.5508, codec_acc=0.4025, codec_topk_acc=0.8806, codec_loss=0.0009549, text_loss=0.0008679, over 444586.84 frames. ], batch size: 2, lr: 1.00e2025-04-24 12:10:01,385 INFO [train.py:611] Epoch 1, batch 11450, loss[loss=0.001752, acc=0.5031, codec_acc=0.4255, codec_topk_acc=0.9005, codec_loss=0.0008613, text_loss=0.0008903, over 2294.00 frames. ], tot_loss[loss=0.001807, acc=0.555, codec_acc=0.4037, codec_topk_acc=0.8805, codec_loss=0.0009505, text_loss=0.0008565, over 446281.77 frames. ], batch size: 11, lr: 1.00e-04, 22025-04-24 12:10:26,2025-04-24 12:10:26,789 INFO [train.py:611] Epoch 1, batch 11500, loss[loss=0.001641, acc=0.7375, codec_acc=0.394, codec_topk_acc=0.8673, codec_loss=0.001076, text_loss=0.0005653, over 1986.00 frames. ], tot_loss[loss=0.001808, acc=0.5563, codec_acc=0.4033, codec_topk_acc=0.8808, codec_loss=0.0009534, text_loss=0.0008551, over 444525.06 frames. ], batch si2025-04-24 12:10:56,467 INFO [train.py:611] Epoch 1, ba2025-04-24 12:10:56,467 INFO [train.py:611] Epoch 1, batch 11550, loss[loss=0.001878, acc=0.4878, codec_acc=0.4162, codec_topk_acc=0.8872, codec_loss=0.0008956, text_loss=0.0009824, over 2312.00 frames. ], tot_loss[loss=0.00181, acc=0.554, codec_acc=0.4033, codec_topk_acc=0.882, codec_loss=0.0009517, text_loss=0.0008583, 2022025-04-24 12:11:22,572 INFO [train.py:611] Epoch 1, b2025-04-24 12:11:22,572 INFO [train.py:611] Epoch 1, batch 11600, loss[loss=0.002134, acc=0.5698, codec_acc=0.399, codec_topk_acc=0.8821, codec_loss=0.001121, text_loss=0.001013, over 1890.00 frames. ], tot_loss[loss=0.001805, acc=0.5548, codec_acc=0.4032, codec_topk_acc=0.8826, codec_loss=0.0009503, text_loss=0.0008548, o222025-04-24 12:11:48,490 INFO [train.py:611] Epoch 1, b2025-04-24 12:11:48,491 INFO [train.py:611] Epoch 1, batch 11650, loss[loss=0.001965, acc=0.4567, codec_acc=0.407, codec_topk_acc=0.88, codec_loss=0.0008924, text_loss=0.001073, over 2350.00 frames. ], tot_loss[loss=0.001797, acc=0.5559, codec_acc=0.4037, codec_topk_acc=0.8829, codec_loss=0.0009481, text_loss=0.0008491, ov202025-04-24 12:12:15,626 INFO [train.py:611] Epoch 1, 2025-04-24 12:12:15,626 INFO [train.py:611] Epoch 1, batch 11700, loss[loss=0.001789, acc=0.5592, codec_acc=0.4032, codec_topk_acc=0.881, codec_loss=0.0009527, text_loss=0.0008364, over 2231.00 frames. ], tot_loss[loss=0.001808, acc=0.5547, codec_acc=0.4042, codec_topk_acc=0.8833, codec_loss=0.0009501, text_loss=0.000858, ove222025-04-24 12:12:45,920 INFO [train.py:611] Epoch 1, batch 11750, loss[loss=0.001948, acc=0.4964, codec_acc=0.4105, codec_topk_acc=0.8913, codec_loss=0.0009035, text_loss=0.001045, over 2293.00 frames. ], tot_loss[loss=0.001825, acc=0.551, codec_acc=0.4046, codec_topk_acc=0.8828, codec_loss=0.0009525, text_loss=0.0008723, over 443113.05 frames. ], batch size: 8, lr: 1.00e-04, 22025-04-24 12:13:10,4222025-04-24 12:13:10,422 INFO2025-04-24 12:13:10,423 INFO [train.py:611] Epoch 1, batch 11800, loss[loss=0.001898, acc=0.4667, codec_acc=0.4118, codec_topk_acc=0.8938, codec_loss=0.0009185, text_loss=0.0009798, over 2222.00 frames. ], tot_loss[loss=0.001815, acc=0.5557, codec_acc=0.4033, codec_topk_acc=0.8825, codec_loss=0.0009549, text_loss=0.0008596, ove2025-04-24 12:13:35,137 INFO [train.py:611] Epoch 1, batch 11850, loss[loss=0.001768, acc=0.565, codec_acc=0.4017, codec_topk_acc=0.8878, codec_loss=0.0009436, text_loss=0.0008245, over 2203.00 frames. ], tot_loss[loss=0.001803, acc=0.5562, codec_acc=0.4036, codec_topk_acc=0.882, codec_loss=0.000952, text_loss=0.0008506, over 444150.95 frames. ], batch size: 9, lr: 1.00e-04, 2022025-04-24 12:14:05,607 INFO [train.py:611] Epoch 1, batch 11900, loss[loss=0.002276, acc=0.5117, codec_acc=0.4111, codec_topk_acc=0.8814, codec_loss=0.001034, text_loss=0.001242, over 2019.00 frames. ], tot_loss[loss=0.001835, acc=0.5475, codec_acc=0.406, codec_topk_acc=0.884, codec_loss=0.0009512, text_loss=0.0008836, over 441922.40 frames. ], batch size: 16, lr: 1.00e-04, 2025-04-24 12:14:31,038 INFO [train.py:611] Epoch 1, batch 11950, loss[loss=0.001795, acc=0.519, codec_acc=0.4096, codec_topk_acc=0.8843, codec_loss=0.0008835, text_loss=0.0009119, over 2338.00 frames. ], tot_loss[loss=0.001825, acc=0.5511, codec_acc=0.4067, codec_topk_acc=0.8846, codec_loss=0.00095, text_loss=0.0008752, over 441707.33 frames. ], batch size: 13, lr: 1.00e-04, 2025-04-24 12:15:01,823 INFO [train.py:611] Epoch 1,2025-04-24 12:15:01,823 INFO [train.py:611] Epoch 1, batch 12000, loss[loss=0.001992, acc=0.4851, codec_acc=0.4094, codec_topk_acc=0.8875, codec_loss=0.0009192, text_loss=0.001072, over 2266.00 frames. ], tot_loss[loss=0.00181, acc=0.5543, codec_acc=0.406, codec_topk_acc=0.8843, codec_loss=0.0009474, text_loss=0.0008625, over 4202025-04-24 12:15:31,327 INFO [train.py:611] Epoch 1, batch 12050, loss[loss=0.001867, acc=0.5161, codec_acc=0.4097, codec_topk_acc=0.8858, codec_loss=0.0009443, text_loss=0.0009229, over 2219.00 frames. ], tot_loss[loss=0.001827, acc=0.5508, codec_acc=0.4067, codec_topk_acc=0.8851, codec_loss=0.0009508, text_loss=0.0008758, over 440905.75 frames. ], batch size: 10, lr: 1.00e-020252025-04-24 12:16:01,717 INFO [train.py:611] Epoch 1, batch 12100, loss[loss=0.001603, acc=0.6058, codec_acc=0.3968, codec_topk_acc=0.887, codec_loss=0.0008835, text_loss=0.0007196, over 2396.00 frames. ], tot_loss[loss=0.001828, acc=0.5506, codec_acc=0.4065, codec_topk_acc=0.8848, codec_loss=0.0009519, text_loss=0.000876, over 440576.48 frames. ], batch size: 8, lr: 1.00e-04,202025-04-24 12:16:2025-04-24 12:16:37,261 INFO [t2025-04-24 12:16:37,261 INFO [train.py:611] Epoch 1, batch 12150, loss[loss=0.002264, acc=0.5467, codec_acc=0.4099, codec_topk_acc=0.8983, codec_loss=0.001132, text_loss=0.001132, over 1803.00 frames. ], tot_loss[loss=0.001806, acc=0.5597, codec_acc=0.4055, codec_topk_acc=0.884, codec_loss=0.0009505, text_loss=0.0008552, over 42025-04-24 12:17:02,993 INFO [train.py:611] Epoch 1, batch 12200, loss[loss=0.001804, acc=0.519, codec_acc=0.4139, codec_topk_acc=0.8932, codec_loss=0.0008887, text_loss=0.0009155, over 2297.00 frames. ], tot_loss[loss=0.001819, acc=0.5525, codec_acc=0.4065, codec_topk_acc=0.8851, codec_loss=0.0009513, text_loss=0.0008674, over 440372.57 frames. ], batch size: 11, lr: 1.00e-04, 20252025-04-24 12:17:32,219 INFO [train.py:611] Epoch 1, batch 12250, loss[loss=0.001691, acc=0.6829, codec_acc=0.3814, codec_topk_acc=0.8576, codec_loss=0.001042, text_loss=0.0006488, over 2163.00 frames. ], tot_loss[loss=0.001819, acc=0.5527, codec_acc=0.4062, codec_topk_acc=0.885, codec_loss=0.000949, text_loss=0.0008704, over 441718.63 frames. ], batch size: 5, lr: 1.00e-04, 2025-04-24 12:17:57,474 INFO [train.py:611] Epoch 1, batch 12300, loss[loss=0.001548, acc=0.6729, codec_acc=0.4032, codec_topk_acc=0.8771, codec_loss=0.0009755, text_loss=0.0005724, over 2179.00 frames. ], tot_loss[loss=0.001804, acc=0.5568, codec_acc=0.4076, codec_topk_acc=0.8853, codec_loss=0.000947, text_loss=0.0008566, over 441813.85 frames. ], batch size: 7, lr: 1.00e-04, 22025-04-24 12:18:26,996 INFO [train.py:611] Epoch 1, batch 12350, loss[loss=0.001933, acc=0.5616, codec_acc=0.3975, codec_topk_acc=0.8834, codec_loss=0.00101, text_loss=0.000923, over 2098.00 frames. ], tot_loss[loss=0.001808, acc=0.5561, codec_acc=0.4065, codec_topk_acc=0.8849, codec_loss=0.0009489, text_loss=0.0008595, over 441640.84 frames. ], batch size: 4, lr: 1.00e-020252025-04-24 12:18:53,051 INFO [train.py:611] Epoch 1, batch 12400, loss[loss=0.001895, acc=0.5096, codec_acc=0.4034, codec_topk_acc=0.8905, codec_loss=0.0009331, text_loss=0.0009623, over 2237.00 frames. ], tot_loss[loss=0.0018, acc=0.5576, codec_acc=0.4071, codec_topk_acc=0.8851, codec_loss=0.0009468, text_loss=0.0008531, over 442295.42 frames. ], batch size: 10, lr: 1.00e2025202025-04-24 12:19:18,92025-04-24 12:19:18,906 INFO [train.py:611] Epoch 1, batch 12450, loss[loss=0.001696, acc=0.5191, codec_acc=0.4189, codec_topk_acc=0.8963, codec_loss=0.0008367, text_loss=0.0008596, over 2405.00 frames. ], tot_loss[loss=0.001809, acc=0.5583, codec_acc=0.4084, codec_topk_acc=0.886, codec_loss=0.0009486, text_loss=0.0008607, over 439896.99 frames. ], batch s2025-04-24 12:19:44,155 INFO [train.py:611] Epoch 1, batch 12500, loss[loss=0.002195, acc=0.4535, codec_acc=0.404, codec_topk_acc=0.8761, codec_loss=0.0009899, text_loss=0.001205, over 2173.00 frames. ], tot_loss[loss=0.001815, acc=0.5515, codec_acc=0.4079, codec_topk_acc=0.8867, codec_loss=0.0009423, text_loss=0.0008727, over 442641.85 frames. ], batch size: 7, lr: 1.00e-04, 2025-04-24 12:20:09,1772025-04-24 12:20:09,177 I2025-04-24 12:20:09,178 INFO [train.py:611] Epoch 1, batch 12550, loss[loss=0.001607, acc=0.6957, codec_acc=0.3909, codec_topk_acc=0.8909, codec_loss=0.001028, text_loss=0.0005783, over 2070.00 frames. ], tot_loss[loss=0.001788, acc=0.5645, codec_acc=0.4091, codec_topk_acc=0.8874, codec_loss=0.0009429, text_loss=0.0008453, over 44202025-04-24 12:20:342025-04-24 12:20:34,205 INFO [train.py:611] Epoch 1, batch 12600, loss[loss=0.001496, acc=0.6456, codec_acc=0.4143, codec_topk_acc=0.8956, codec_loss=0.0008895, text_loss=0.000606, over 2271.00 frames. ], tot_loss[loss=0.001795, acc=0.5591, codec_acc=0.4104, codec_topk_acc=0.8879, codec_loss=0.0009388, text_loss=0.0008558, over 441836.51 frames. ], batch s2025-04-24 12:20:59,588 INFO [train.py:611] Epoch 2025-04-24 12:20:59,588 INFO [train.py:611] Epoch 1, batch 12650, loss[loss=0.001706, acc=0.5704, codec_acc=0.3927, codec_topk_acc=0.8969, codec_loss=0.0008796, text_loss=0.0008261, over 2336.00 frames. ], tot_loss[loss=0.001796, acc=0.5614, codec_acc=0.4097, codec_topk_acc=0.888, codec_loss=0.0009426, text_loss=0.0008532, over 442025-04-24 12:21:34,602 INFO [train.py:611] Epoch 1, batch 12700, loss[loss=0.002203, acc=0.5341, codec_acc=0.4113, codec_topk_acc=0.8973, codec_loss=0.001074, text_loss=0.001129, over 1883.00 frames. ], tot_loss[loss=0.001806, acc=0.5513, codec_acc=0.4118, codec_topk_acc=0.8894, codec_loss=0.0009337, text_loss=0.0008723, over 442675.75 frames. ], batch size: 16, lr: 1.00e-04, 20252025-04-24 12:22:03,568 INFO [train.py:611] 2025-04-24 12:22:03,569 INFO [train.py:611] Epoch 1, batch 12750, loss[loss=0.00195, acc=0.5601, codec_acc=0.4024, codec_topk_acc=0.8853, codec_loss=0.001018, text_loss=0.0009317, over 2061.00 frames. ], tot_loss[loss=0.001796, acc=0.5615, codec_acc=0.4105, codec_topk_acc=0.8881, codec_loss=0.000943, text_loss=0.0008528, over 43952122025-04-242025-04-24 12:22:28,865 INFO [train.py:611] Epoch 1, batch 12800, loss[loss=0.001694, acc=0.528, codec_acc=0.4259, codec_topk_acc=0.895, codec_loss=0.0008559, text_loss=0.0008381, over 2352.00 frames. ], tot_loss[loss=0.001785, acc=0.5574, codec_acc=0.411, codec_topk_acc=0.8896, codec_loss=0.0009325, text_loss=0.0008522, over 442976.06 frames. ], batch size: 10, lr: 122025-04-24 12:23:02025-04-24 12:23:03,806 INFO [train.py:611] Epoch 1, batch 12850, loss[loss=0.001492, acc=0.6343, codec_acc=0.4069, codec_topk_acc=0.8956, codec_loss=0.0008384, text_loss=0.0006536, over 2454.00 frames. ], tot_loss[loss=0.001806, acc=0.5563, codec_acc=0.4117, codec_topk_acc=0.8889, codec_loss=0.0009424, text_loss=0.0008633, over 438831.51 frames. ], batch siz222025-04-24 12:23:282025-04-24 12:23:28,780 INFO [train.py:611] Epoch 1, batch 12900, loss[loss=0.001455, acc=0.6229, codec_acc=0.4422, codec_topk_acc=0.9025, codec_loss=0.0008177, text_loss=0.0006368, over 2415.00 frames. ], tot_loss[loss=0.001793, acc=0.5578, codec_acc=0.4125, codec_topk_acc=0.8894, codec_loss=0.0009368, text_loss=0.0008565, over 440627.56 frames. ], batch si2022025-04-24 12:23:54,897 INFO [train.py:611] Epoch 1, batch 12950, loss[loss=0.001606, acc=0.6065, codec_acc=0.4138, codec_topk_acc=0.888, codec_loss=0.0009051, text_loss=0.0007005, over 2285.00 frames. ], tot_loss[loss=0.001772, acc=0.5621, codec_acc=0.4103, codec_topk_acc=0.8888, codec_loss=0.000933, text_loss=0.0008393, over 443612.24 frames. ], batch size: 8, lr: 1.00e-020252025-04-24 12:24:29,2025-04-24 12:24:29,7552025-04-24 12:24:29,755 INFO [train.py:611] Epoch 1, batch 13000, loss[loss=0.001966, acc=0.4915, codec_acc=0.4218, codec_topk_acc=0.9056, codec_loss=0.0009178, text_loss=0.001048, over 2158.00 frames. ], tot_loss[loss=0.001792, acc=0.5586, codec_acc=0.4109, codec_topk_acc=0.8889, codec_loss=0.0009377, text_loss=0.0008543, over 4412025-04-24 12:24:59,907 INFO [train.py:611] Epoch 1, batch 13050, loss[loss=0.00196, acc=0.4932, codec_acc=0.4125, codec_topk_acc=0.8878, codec_loss=0.0009269, text_loss=0.001033, over 2250.00 frames. ], tot_loss[loss=0.001769, acc=0.5661, codec_acc=0.4118, codec_topk_acc=0.8894, codec_loss=0.0009311, text_loss=0.0008383, over 443308.10 frames. ], batch size: 4, lr: 1.00e-04, 2025-04-24 12:22025-04-24 12:25:24,743 INFO [train2025-04-24 12:25:24,744 INFO [train.py:611] Epoch 1, batch 13100, loss[loss=0.00157, acc=0.6517, codec_acc=0.3917, codec_topk_acc=0.8815, codec_loss=0.0009648, text_loss=0.0006053, over 2219.00 frames. ], tot_loss[loss=0.001779, acc=0.5583, codec_acc=0.4127, codec_topk_acc=0.8897, codec_loss=0.0009288, text_loss=0.0008507, over 2025-04-24 12:25:51,309 INFO [train.py:611] Epoch 1, batch 13150, loss[loss=0.00176, acc=0.5076, codec_acc=0.4206, codec_topk_acc=0.8976, codec_loss=0.0008385, text_loss=0.0009219, over 2421.00 frames. ], tot_loss[loss=0.001777, acc=0.5593, codec_acc=0.4129, codec_topk_acc=0.8902, codec_loss=0.0009289, text_loss=0.000848, over 443030.66 frames. ], batch size: 11, lr: 1.00e-04, 2025-02025-04-24 12:26:16,314 INFO [train.py:611] Epo2025-04-24 12:26:16,314 INFO [train.py:611] Epoch 1, batch 13200, loss[loss=0.001983, acc=0.5284, codec_acc=0.4004, codec_topk_acc=0.8913, codec_loss=0.0009354, text_loss=0.001047, over 2218.00 frames. ], tot_loss[loss=0.00179, acc=0.5553, codec_acc=0.4124, codec_topk_acc=0.8899, codec_loss=0.0009309, text_loss=0.0008593, over2025-04-24 12:26:43,765 INFO [train.py:611] Epoch 1,2025-04-24 12:26:43,765 INFO [train.py:611] Epoch 1, batch 13250, loss[loss=0.001455, acc=0.6099, codec_acc=0.4137, codec_topk_acc=0.8834, codec_loss=0.0008464, text_loss=0.0006088, over 2444.00 frames. ], tot_loss[loss=0.001779, acc=0.5553, codec_acc=0.4132, codec_topk_acc=0.8907, codec_loss=0.0009245, text_loss=0.0008544, ov2025-02025-04-24 12:27:07,8982025-04-24 12:27:07,898 I2025-04-24 12:27:07,898 INFO [train.py:611] Epoch 1, batch 13300, loss[loss=0.001845, acc=0.5425, codec_acc=0.4179, codec_topk_acc=0.9039, codec_loss=0.0009036, text_loss=0.0009414, over 2208.00 frames. ], tot_loss[loss=0.001797, acc=0.5553, codec_acc=0.4126, codec_topk_acc=0.8903, codec_loss=0.0009324, text_loss=0.0008643, 22025-02025-04-24 12:27:37,712025-04-24 12:27:37,712 INF2025-04-24 12:27:37,712 INFO [train.py:611] Epoch 1, batch 13350, loss[loss=0.001534, acc=0.5911, codec_acc=0.4171, codec_topk_acc=0.8954, codec_loss=0.0008396, text_loss=0.0006941, over 2417.00 frames. ], tot_loss[loss=0.001799, acc=0.5536, codec_acc=0.4132, codec_topk_acc=0.8908, codec_loss=0.0009311, text_loss=0.0008683, 2025-2025-04-24 12:28:08,770 INFO [train.py:611] Epoch 12025-04-24 12:28:08,770 INFO [train.py:611] Epoch 1, batch 13400, loss[loss=0.001619, acc=0.5857, codec_acc=0.4234, codec_topk_acc=0.893, codec_loss=0.0008428, text_loss=0.000776, over 2406.00 frames. ], tot_loss[loss=0.001798, acc=0.5519, codec_acc=0.4136, codec_topk_acc=0.8912, codec_loss=0.0009275, text_loss=0.0008709, o2025-2025-04-24 12:28:43,2025-04-24 12:28:43,823 INFO [train.py:611] Epoch 1, batch 13450, loss[loss=0.00171, acc=0.6048, codec_acc=0.4256, codec_topk_acc=0.8923, codec_loss=0.0009088, text_loss=0.0008014, over 2207.00 frames. ], tot_loss[loss=0.001781, acc=0.556, codec_acc=0.4148, codec_topk_acc=0.8924, codec_loss=0.0009247, text_loss=0.0008562, over 442377.81 frames. ], batch2025-042025-04-24 12:29:12025-04-24 12:29:14,574 INFO [tr2025-04-24 12:29:14,574 INFO [train.py:611] Epoch 1, batch 13500, loss[loss=0.001698, acc=0.7951, codec_acc=0.392, codec_topk_acc=0.8613, codec_loss=0.001219, text_loss=0.0004792, over 1806.00 frames. ], tot_loss[loss=0.00179, acc=0.5579, codec_acc=0.4138, codec_topk_acc=0.8913, codec_loss=0.0009293, text_loss=0.0008604, 2025-042025-04-24 12:29:41,058 INFO [train.py:611] Epoch 1, batch 13550, loss[loss=0.001621, acc=0.5903, codec_acc=0.4169, codec_topk_acc=0.8992, codec_loss=0.000929, text_loss=0.000692, over 2166.00 frames. ], tot_loss[loss=0.001788, acc=0.5511, codec_acc=0.416, codec_topk_acc=0.8934, codec_loss=0.0009183, text_loss=0.0008697, over 443580.01 frames. ], batch size: 5, lr: 1.00e-2025-2025-04-24 12:30:08,52025-04-24 12:30:08,553 INFO [t2025-04-24 12:30:08,554 INFO [train.py:611] Epoch 1, batch 13600, loss[loss=0.001779, acc=0.5109, codec_acc=0.4163, codec_topk_acc=0.8939, codec_loss=0.0008649, text_loss=0.0009138, over 2340.00 frames. ], tot_loss[loss=0.001787, acc=0.5556, codec_acc=0.4158, codec_topk_acc=0.8927, codec_loss=0.0009236, text_loss=0.00086312025-2025-04-24 12:30:37,7842025-04-24 12:30:37,784 INFO [train.py:611] Epoch 1, batch 13650, loss[loss=0.002558, acc=0.5294, codec_acc=0.3905, codec_topk_acc=0.9013, codec_loss=0.001266, text_loss=0.001292, over 1637.00 frames. ], tot_loss[loss=0.001797, acc=0.5474, codec_acc=0.4163, codec_topk_acc=0.8938, codec_loss=0.0009206, text_loss=0.0008768, over 442330.59 frames. ], ba2025-2025-042025-04-24 12:31:11,975 INFO [train.py:611] Ep2025-04-24 12:31:11,975 INFO [train.py:611] Epoch 1, batch 13700, loss[loss=0.001868, acc=0.4853, codec_acc=0.4172, codec_topk_acc=0.8884, codec_loss=0.0008627, text_loss=0.001005, over 2372.00 frames. ], tot_loss[loss=0.001785, acc=0.5609, codec_acc=0.4154, codec_topk_acc=0.8924, codec_loss=0.0009285, text_loss=0.00085652025-04-24 12:31:37,330 INFO [train.py:611] Epoch 1, batch 2025-04-24 12:31:37,330 INFO [train.py:611] Epoch 1, batch 13750, loss[loss=0.001714, acc=0.4771, codec_acc=0.4379, codec_topk_acc=0.8928, codec_loss=0.0008248, text_loss=0.0008896, over 2411.00 frames. ], tot_loss[loss=0.001771, acc=0.564, codec_acc=0.416, codec_topk_acc=0.8933, codec_loss=0.000925, text_loss=0.0008463,2025-04-24 12:32:02,526 INFO [train.py:611] Epoch 1, batch 13800, loss[loss=0.001968, acc=0.47, codec_acc=0.4084, codec_topk_acc=0.8921, codec_loss=0.0008953, text_loss=0.001073, over 2302.00 frames. ], tot_loss[loss=0.001768, acc=0.5597, codec_acc=0.4156, codec_topk_acc=0.8931, codec_loss=0.0009208, text_loss=0.0008469, over 443170.78 frames. ], batch size: 9, lr: 1.00e-020252025-04-24 12:32:28,359 INFO [t2025-04-24 12:32:28,360 INFO [train.py:611] Epoch 1, batch 13850, loss[loss=0.001648, acc=0.5508, codec_acc=0.4067, codec_topk_acc=0.8878, codec_loss=0.0008604, text_loss=0.0007873, over 2397.00 frames. ], tot_loss[loss=0.001802, acc=0.545, codec_acc=0.4161, codec_topk_acc=0.894, codec_loss=0.0009154, text_loss=0.0008865, over 444614.25 frames.2025-2025-04-24 12:32:54,449 INFO [train.py:611] Epoch 1, batch 13900, loss[loss=0.001428, acc=0.6986, codec_acc=0.4062, codec_topk_acc=0.8959, codec_loss=0.0008857, text_loss=0.0005426, over 2298.00 frames. ], tot_loss[loss=0.001772, acc=0.5546, codec_acc=0.4172, codec_topk_acc=0.8943, codec_loss=0.0009145, text_loss=0.0008575, over 444198.54 frames. ], batch size: 5, lr: 1.00e-04, 2025-04-24 12:33:19,240 INFO [train.py:611] Epoch 1, batch 13950, loss[loss=0.001426, acc=0.7015, codec_acc=0.4253, codec_topk_acc=0.8913, codec_loss=0.0009052, text_loss=0.0005207, over 2287.00 frames. ], tot_loss[loss=0.001766, acc=0.555, codec_acc=0.4176, codec_topk_acc=0.8954, codec_loss=0.0009107, text_loss=0.0008548, over 444990.15 frames. ], batch size: 4, lr: 1.00e-04, 2025-04-24 12:33:49,550 INFO [train.py:611] Epoch 1, 2025-04-24 12:33:49,555 INFO [train.py:611] Epoch 1, batch 14000, loss[loss=0.001898, acc=0.4739, codec_acc=0.4209, codec_topk_acc=0.8968, codec_loss=0.0008502, text_loss=0.001048, over 2356.00 frames. ], tot_loss[loss=0.001788, acc=0.552, codec_acc=0.4166, codec_topk_acc=0.8945, codec_loss=0.0009162, text_loss=0.0008721, ov2022025-04-24 12:34:14,414 INFO [train.py:611] Epoch 1, batch 14050, loss[loss=0.002067, acc=0.5133, codec_acc=0.4197, codec_topk_acc=0.8982, codec_loss=0.00097, text_loss=0.001097, over 2034.00 frames. ], tot_loss[loss=0.00177, acc=0.5573, codec_acc=0.4171, codec_topk_acc=0.8947, codec_loss=0.0009157, text_loss=0.0008538, over 443192.06 frames. ], batch size: 3, lr: 1.00e-042025-04-24 12:34:44,357 INFO [train.py:611] Epoch 1, bat2025-04-24 12:34:44,357 INFO [train.py:611] Epoch 1, batch 14100, loss[loss=0.001788, acc=0.5406, codec_acc=0.4032, codec_topk_acc=0.885, codec_loss=0.0009036, text_loss=0.0008845, over 2334.00 frames. ], tot_loss[loss=0.00177, acc=0.5577, codec_acc=0.4173, codec_topk_acc=0.8951, codec_loss=0.0009125, text_loss=0.0008571, ov22025-04-24 12:35:14,438 INFO [train.py:611] Epoch 1, batch 14150, loss[loss=0.001565, acc=0.6209, codec_acc=0.4255, codec_topk_acc=0.8966, codec_loss=0.0008696, text_loss=0.0006956, over 2313.00 frames. ], tot_loss[loss=0.001769, acc=0.5611, codec_acc=0.418, codec_topk_acc=0.8952, codec_loss=0.0009168, text_loss=0.0008517, over 441886.43 frames. ], batch size: 4, lr: 1.00e-042022025-04-24 12:35:45,366 INFO [train.py:611] Epoch 1, batch 14200, loss[loss=0.001587, acc=0.7278, codec_acc=0.3984, codec_topk_acc=0.8534, codec_loss=0.001006, text_loss=0.0005818, over 2198.00 frames. ], tot_loss[loss=0.001773, acc=0.5598, codec_acc=0.4181, codec_topk_acc=0.8952, codec_loss=0.0009169, text_loss=0.0008557, over 441547.61 frames. ], batch size: 3, lr: 1.00e-2025-2025-04-24 12:36:10,066 INFO [train.py:611] Epoch 1, batch 14250, loss[loss=0.001661, acc=0.5391, codec_acc=0.4167, codec_topk_acc=0.9018, codec_loss=0.0008722, text_loss=0.0007883, over 2289.00 frames. ], tot_loss[loss=0.001773, acc=0.5612, codec_acc=0.4176, codec_topk_acc=0.8953, codec_loss=0.0009174, text_loss=0.0008554, over 441513.38 frames. ], batch size: 8, lr: 1.00e2025-04-24 12:36:46,716 INFO [train.py:611] Epoch 1, batch 14300, loss[loss=0.001741, acc=0.6494, codec_acc=0.4311, codec_topk_acc=0.8951, codec_loss=0.0009285, text_loss=0.0008121, over 2155.00 frames. ], tot_loss[loss=0.001763, acc=0.5615, codec_acc=0.4176, codec_topk_acc=0.8949, codec_loss=0.0009146, text_loss=0.0008486, over 443119.55 frames. ], batch size: 7, lr: 1.00e-04, 2025-04-24 12:37:17,162 INFO [train.py:611] Epoch 1, batch 14350, loss[loss=0.001588, acc=0.584, codec_acc=0.4191, codec_topk_acc=0.8941, codec_loss=0.0008567, text_loss=0.0007311, over 2384.00 frames. ], tot_loss[loss=0.001753, acc=0.5617, codec_acc=0.4178, codec_topk_acc=0.8956, codec_loss=0.0009096, text_loss=0.0008435, over 445004.91 frames. ], batch size: 8, lr: 1.00e-04, 2025-04-24 12:37:42,050 INFO [train.py:611] Epoch 1, batch 2025-04-24 12:37:42,051 INFO [train.py:611] Epoch 1, batch 14400, loss[loss=0.001705, acc=0.5579, codec_acc=0.4212, codec_topk_acc=0.9048, codec_loss=0.0008998, text_loss=0.0008053, over 2201.00 frames. ], tot_loss[loss=0.001769, acc=0.559, codec_acc=0.4165, codec_topk_acc=0.8951, codec_loss=0.000916, text_loss=0.000852025-04-24 12:38:07,668 INFO [train.py:611] Epoch 1, batch 12025-04-24 12:38:07,669 INFO [train.py:611] Epoch 1, batch 14450, loss[loss=0.002044, acc=0.7059, codec_acc=0.3601, codec_topk_acc=0.8645, codec_loss=0.001356, text_loss=0.0006877, over 1675.00 frames. ], tot_loss[loss=0.001767, acc=0.56, codec_acc=0.4163, codec_topk_acc=0.8949, codec_loss=0.0009166, text_loss=0.00085082025-2025-04-24 12:38:38,252 INFO [train.py:611] Epoch 1, batch 14500, loss[loss=0.001766, acc=0.5065, codec_acc=0.4172, codec_topk_acc=0.9032, codec_loss=0.0008375, text_loss=0.0009283, over 2367.00 frames. ], tot_loss[loss=0.001792, acc=0.5523, codec_acc=0.4171, codec_topk_acc=0.8949, codec_loss=0.0009181, text_loss=0.0008738, over 442086.65 frames. ], batch size: 9, lr: 1.00e2025-02025-04-24 12:39:02,735 INFO [train.py:611] Epoch 1, batch 14550, loss[loss=0.001734, acc=0.5518, codec_acc=0.4292, codec_topk_acc=0.8993, codec_loss=0.0008558, text_loss=0.000878, over 2302.00 frames. ], tot_loss[loss=0.001787, acc=0.5522, codec_acc=0.417, codec_topk_acc=0.8946, codec_loss=0.0009161, text_loss=0.0008705, over 443199.93 frames. ], batch size: 14, lr: 1.00e-2025-2025-04-24 12:39:27,122 INFO [train.py:611] Epoch 1, batch 14600, loss[loss=0.001984, acc=0.4407, codec_acc=0.4292, codec_topk_acc=0.9076, codec_loss=0.0008678, text_loss=0.001116, over 2243.00 frames. ], tot_loss[loss=0.001784, acc=0.5537, codec_acc=0.4167, codec_topk_acc=0.895, codec_loss=0.0009163, text_loss=0.0008675, over 442830.20 frames. ], batch size: 10, lr: 1.00e-2025-2025-04-24 12:39:54,315 INFO [train.py:611] Epoch 1, batch 14650, loss[loss=0.001973, acc=0.5188, codec_acc=0.4064, codec_topk_acc=0.887, codec_loss=0.0009598, text_loss=0.001013, over 2147.00 frames. ], tot_loss[loss=0.001794, acc=0.5506, codec_acc=0.4178, codec_topk_acc=0.8957, codec_loss=0.0009156, text_loss=0.000878, over 442016.53 frames. ], batch size: 6, lr: 1.00e2025-02025-04-24 12:40:23,730 INFO [train.py:611] Epoch 1, batch 14700, loss[loss=0.001745, acc=0.5208, codec_acc=0.4206, codec_topk_acc=0.899, codec_loss=0.0009032, text_loss=0.0008417, over 2226.00 frames. ], tot_loss[loss=0.001794, acc=0.5508, codec_acc=0.4187, codec_topk_acc=0.8962, codec_loss=0.0009151, text_loss=0.0008785, over 441279.15 frames. ], batch size: 9, lr: 1.00e-2025-04-24 12:40:47,918 INFO [train.py:611] Epoch 1, batch 14750, loss[loss=0.001481, acc=0.7007, codec_acc=0.4233, codec_topk_acc=0.8935, codec_loss=0.0009313, text_loss=0.0005495, over 2165.00 frames. ], tot_loss[loss=0.001765, acc=0.5585, codec_acc=0.418, codec_topk_acc=0.8959, codec_loss=0.0009132, text_loss=0.0008517, over 442708.35 frames. ], batch size: 5, lr: 1.00e-04, 2022025-04-24 12:41:12,547 INFO [train.py:611] Epoch 1, batch 14800, loss[loss=0.002007, acc=0.6438, codec_acc=0.409, codec_topk_acc=0.9015, codec_loss=0.001131, text_loss=0.0008761, over 1821.00 frames. ], tot_loss[loss=0.001781, acc=0.5583, codec_acc=0.4183, codec_topk_acc=0.8961, codec_loss=0.0009174, text_loss=0.0008633, over 440582.30 frames. ], batch size: 2, lr: 1.00e-042025-04-24 12:41:48,168 INFO [train.py:611] Epoch 1, batch 14850, loss[loss=0.001563, acc=0.5613, codec_acc=0.4294, codec_topk_acc=0.9001, codec_loss=0.0008219, text_loss=0.0007414, over 2392.00 frames. ], tot_loss[loss=0.001759, acc=0.5657, codec_acc=0.4187, codec_topk_acc=0.8956, codec_loss=0.0009182, text_loss=0.0008403, over 440232.78 frames. ], batch size: 9, lr: 1.00e-04, 2025-04-24 12:42:18,274 INF2025-04-24 12:42:18,274 INFO [train.py:611] Epoch 1, batch 14900, loss[loss=0.001601, acc=0.582, codec_acc=0.4661, codec_topk_acc=0.9196, codec_loss=0.0007989, text_loss=0.0008025, over 2297.00 frames. ], tot_loss[loss=0.001762, acc=0.5622, codec_acc=0.4187, codec_topk_acc=0.8965, codec_loss=0.0009152, text_loss=0.000847, over 441272.72 frames. ], b2025-04-24 12:42:42,371 INFO [train.py:611] Epoch 1, batch 14950, loss[loss=0.002534, acc=0.4955, codec_acc=0.4256, codec_topk_acc=0.8965, codec_loss=0.001191, text_loss=0.001343, over 1675.00 frames. ], tot_loss[loss=0.00177, acc=0.5639, codec_acc=0.4183, codec_topk_acc=0.896, codec_loss=0.0009186, text_loss=0.0008518, over 440089.53 frames. ], batch size: 2, lr: 1.00e-04, 2025-02025-04-24 12:43:06,172 INFO [train.py:539] Computing validation2025-04-24 12:43:10,047 INFO [tra2025-04-24 12:43:10,047 INFO [train.py:548] Epoch 1, validation: loss=0.001835, acc=0.5741, codec_acc=0.4185, codec_topk_acc=0.8973, codec_loss=0.0009616, text_loss=0.2025-04-24 12:43:10,048 INFO [tra2025-04-24 12:43:10,048 INFO [train.py:549] Maximum me2025-04-24 12:43:23,657 INFO [train.py:611] Epoch 1, batch 15000, loss[loss=0.002243, acc=0.5045, codec_acc=0.4248, codec_topk_acc=0.8929, codec_loss=0.001062, text_loss=0.001181, over 1935.00 frames. ], tot_loss[loss=0.00177, acc=0.566, codec_acc=0.4178, codec_topk_acc=0.8952, codec_loss=0.0009209, text_loss=0.0008487, over 439977.15 frames. ], batch size: 3, lr: 1.00e-04, 2025-04-24 12:43:48,685 INFO [train.py:611] Epoch 1, batch 15050, 2025-04-24 12:43:48,685 INFO [train.py:611] Epoch 1, batch 15050, loss[loss=0.001933, acc=0.5179, codec_acc=0.4368, codec_topk_acc=0.9076, codec_loss=0.0009532, text_loss=0.0009794, over 2043.00 frames. ], tot_loss[loss=0.001768, acc=0.5591, codec_acc=0.4184, codec_topk_acc=0.8972, codec_loss=0.0009125, text_loss2022025-04-24 12:44:23,501 INFO [train.py:611] Epoch 1, batch 15100, loss[loss=0.001325, acc=0.7424, codec_acc=0.4305, codec_topk_acc=0.9079, codec_loss=0.00085, text_loss=0.0004747, over 2295.00 frames. ], tot_loss[loss=0.00178, acc=0.5592, codec_acc=0.4187, codec_topk_acc=0.896, codec_loss=0.0009197, text_loss=0.0008601, over 439321.09 frames. ], batch size: 3, lr: 1.00e-04, 22025-04-24 12:44:54,934 INFO [train.py:611] Epoch 1, batch 15150, loss[loss=0.001668, acc=0.5543, codec_acc=0.4368, codec_topk_acc=0.905, codec_loss=0.0008497, text_loss=0.0008184, over 2274.00 frames. ], tot_loss[loss=0.001778, acc=0.5567, codec_acc=0.4192, codec_topk_acc=0.8968, codec_loss=0.0009139, text_loss=0.0008642, over 441199.65 frames. ], batch size: 11, lr: 1.00e-04, 2025-04-24 12:45:19,441 INFO [train.py:611] Epoch 1, batch 15200, loss[loss=0.001587, acc=0.5209, codec_acc=0.427, codec_topk_acc=0.9044, codec_loss=0.000809, text_loss=0.0007776, over 2453.00 frames. ], tot_loss[loss=0.001767, acc=0.5581, codec_acc=0.4199, codec_topk_acc=0.8977, codec_loss=0.0009103, text_loss=0.0008565, over 441624.87 frames. ], batch size: 10, lr: 1.00e-04, 22025-04-24 12:45:44,616 INFO [train.p2025-04-24 12:45:44,616 INFO [tra2025-04-24 12:45:44,616 INFO [train.py:611] Epoch 1, batch 15250, loss[loss=0.001397, acc=0.6458, codec_acc=0.4424, codec_topk_acc=0.9111, codec_loss=0.0008332, text_loss=0.0005641, over 2280.00 frames. ], tot_loss[loss=0.001776, acc=0.5531, codec_acc=0.4205, codec_topk_acc=0.8978, codec_loss=0.0009081, text2025-04-24 12:46:10,205 INFO [train.py:611] Epoch 1, batch 15300, loss[lo2025-04-24 12:46:10,205 INFO [train.py:611] Epoch 1, batch 15300, loss[loss=0.001631, acc=0.5139, codec_acc=0.4251, codec_topk_acc=0.914, codec_loss=0.0008036, text_loss=0.0008278, over 2394.00 frames. ], tot_loss[loss=0.001766, acc=0.5497, codec_acc=0.4211, codec_topk_acc=0.8989, codec_loss=0.0008992, t2022025-04-24 12:46:35,286 INFO [train.py:611] Epoch 1, batch 15350, loss[loss=0.002305, acc=0.4931, codec_acc=0.4289, codec_topk_acc=0.8975, codec_loss=0.001059, text_loss=0.001246, over 1872.00 frames. ], tot_loss[loss=0.00174, acc=0.5598, codec_acc=0.4223, codec_topk_acc=0.898, codec_loss=0.0008986, text_loss=0.0008412, over 445795.64 frames. ], batch size: 16, lr: 1.00e-04, 22025-04-24 12:47:00,258 INFO [train.py:611] Epoch 1, batch 15400, loss[loss=2025-04-24 12:47:00,258 INFO [train.py:611] Epoch 1, batch 15400, loss[loss=0.00184, acc=0.6889, codec_acc=0.4684, codec_topk_acc=0.9274, codec_loss=0.00104, text_loss=0.0007995, over 1697.00 frames. ], tot_loss[loss=0.001756, acc=0.555, codec_acc=0.4227, codec_topk_acc=0.8992, codec_loss=0.0008991, te2025-04-2202025-04-24 12:47:30,243 INFO [train.py:611] Epoch 1, batch 15450, loss[loss=0.001768, acc=0.5133, codec_acc=0.4349, codec_topk_acc=0.9012, codec_loss=0.0008503, text_loss=0.000918, over 2304.00 frames. ], tot_loss[loss=0.001754, acc=0.5583, codec_acc=0.4224, codec_topk_acc=0.8987, codec_loss=0.000899, text_loss=0.0008553, over 444775.17 frames. ], batch size: 9, lr:2020252025-04-24 12:48:00,916 INFO [train.py:611] Epoch 1, batch 15500, loss[loss=0.001675, acc=0.5556, codec_acc=0.4632, codec_topk_acc=0.9147, codec_loss=0.0007906, text_loss=0.0008841, over 2293.00 frames. ], tot_loss[loss=0.001743, acc=0.5599, codec_acc=0.4217, codec_topk_acc=0.8989, codec_loss=0.0009007, text_loss=0.0008423, over 444479.76 frames. ], batch size: 5, lr: 1.022025-042025-04-24 12:48:25,810 INFO [train.py:611] Epoch 1, batch 15550, loss[2025-04-24 12:48:25,811 INFO [train.py:611] Epoch 1, batch 15550, loss[loss=0.002074, acc=0.6182, codec_acc=0.4205, codec_topk_acc=0.9084, codec_loss=0.001167, text_loss=0.0009069, over 1701.00 frames. ], tot_loss[loss=0.001767, acc=0.5516, codec_acc=0.4231, codec_topk_acc=0.8996, codec_loss=0.0008982025-04-24 12:48:56,440 INFO [train.py:611] Epoch 1, batch 15600, loss[loss=0.002025-04-24 12:48:56,440 INFO [train.py:611] Epoch 1, batch 15600, loss[loss=0.001767, acc=0.7325, codec_acc=0.4067, codec_topk_acc=0.8936, codec_loss=0.001106, text_loss=0.0006611, over 1834.00 frames. ], tot_loss[loss=0.00177, acc=0.5528, codec_acc=0.4234, codec_topk_acc=0.8999, codec_loss=0.00090022025-04-24 12:49:21,502 INFO [train.py:611] Epoch 1, batch 15650, loss[loss=0.001633, acc=0.5547, codec_acc=0.4402, codec_topk_acc=0.9167, codec_loss=0.0008529, text_loss=0.0007804, over 2217.00 frames. ], tot_loss[loss=0.001737, acc=0.5576, codec_acc=0.4242, codec_topk_acc=0.9002, codec_loss=0.0008924, text_loss=0.0008449, over 445891.60 frames. ], batch size: 6, lr: 1.00e-04,20202025-04-24 12:49:56,681 INFO [train.py:611] Epoch 1, batch 15700, loss[loss=0.001326, acc=0.7181, codec_acc=0.4232, codec_topk_acc=0.8992, codec_loss=0.000812, text_loss=0.0005145, over 2398.00 frames. ], tot_loss[loss=0.00173, acc=0.5624, codec_acc=0.4243, codec_topk_acc=0.9005, codec_loss=0.0008943, text_loss=0.0008359, over 444894.70 frames. ], batch size: 4, lr: 1.00e-04, text_loss=0.000855, over 445067.19 frames. ], batch size: 4, lr: 1.00e-04, 2025-04-24 12:50:19,817 INFO [train.py:940] Saving batch to slam_omni/exp_speech2speech_rerun/batch-bdd640fb-0667-1ad1-1c80-317fa3b1799d.pt 2025-04-24 12:50:19,841 INFO [train.py:945] features shape: torch.Size([6, 721, 80]) ss=0.001076, text_loss=0.001592, over 1833.00 frames. ], tot_loss[loss=0.00176, acc=0.5528, codec_acc=0.4254, codec_topk_acc=0.9006, codec_loss=0.0008946, text_loss=0.0008655, over 444031.49 frames. ], batch size: 3, lr: 1.00e-04,