File size: 9,080 Bytes
55e4ddb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher. 09/17/2024 17:17:16 - WARNING - __main__ - Process rank: 0, device: cuda:0, n_gpu: 1, distributed training: False, 16-bits training: False 09/17/2024 17:17:16 - INFO - __main__ - Training/evaluation parameters DistillationTrainingArguments( _n_gpu=1, accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False}, adafactor=False, adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, auto_find_batch_size=False, batch_eval_metrics=False, bf16=False, bf16_full_eval=False, data_seed=None, dataloader_drop_last=False, dataloader_num_workers=8, dataloader_persistent_workers=False, dataloader_pin_memory=True, dataloader_prefetch_factor=None, ddp_backend=None, ddp_broadcast_buffers=None, ddp_bucket_cap_mb=None, ddp_find_unused_parameters=None, ddp_timeout=7200, debug=[], deepspeed=None, disable_tqdm=False, dispatch_batches=None, do_eval=True, do_predict=False, do_train=True, dtype=bfloat16, eval_accumulation_steps=None, eval_delay=0, eval_do_concat_batches=True, eval_on_start=False, eval_steps=1000.0, eval_strategy=no, eval_use_gather_object=False, evaluation_strategy=None, fp16=False, fp16_backend=auto, fp16_full_eval=False, fp16_opt_level=O1, freeze_encoder=True, fsdp=[], fsdp_config={'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, fsdp_min_num_params=0, fsdp_transformer_layer_cls_to_wrap=None, full_determinism=False, generation_config=None, generation_max_length=None, generation_num_beams=None, gradient_accumulation_steps=1, gradient_checkpointing=True, gradient_checkpointing_kwargs=None, greater_is_better=None, group_by_length=False, half_precision_backend=auto, hub_always_push=False, hub_model_id=None, hub_private_repo=False, hub_strategy=every_save, hub_token=<HUB_TOKEN>, ignore_data_skip=False, include_inputs_for_metrics=False, include_num_input_tokens_seen=False, include_tokens_per_second=False, jit_mode_eval=False, kl_weight=1.0, label_names=None, label_smoothing_factor=0.0, learning_rate=0.0001, length_column_name=length, load_best_model_at_end=False, local_rank=0, log_level=passive, log_level_replica=warning, log_on_each_node=True, logging_dir=./runs/Sep17_17-17-13_22c57e4734ce, logging_first_step=False, logging_nan_inf_filter=True, logging_steps=25, logging_strategy=steps, lr_scheduler_kwargs={}, lr_scheduler_type=constant_with_warmup, max_grad_norm=1.0, max_steps=5000, metric_for_best_model=None, mp_parameters=, neftune_noise_alpha=None, no_cuda=False, num_train_epochs=3.0, optim=adamw_torch, optim_args=None, optim_target_modules=None, output_dir=./, overwrite_output_dir=True, past_index=-1, per_device_eval_batch_size=32, per_device_train_batch_size=32, predict_with_generate=True, prediction_loss_only=False, push_to_hub=True, push_to_hub_model_id=None, push_to_hub_organization=None, push_to_hub_token=<PUSH_TO_HUB_TOKEN>, ray_scope=last, remove_unused_columns=True, report_to=['tensorboard'], restore_callback_states_from_checkpoint=False, resume_from_checkpoint=None, run_name=./, save_on_each_node=False, save_only_model=False, save_safetensors=True, save_steps=1000, save_strategy=steps, save_total_limit=1, seed=42, skip_memory_metrics=True, sortish_sampler=False, split_batches=None, temperature=2.0, tf32=None, torch_compile=False, torch_compile_backend=None, torch_compile_mode=None, torch_empty_cache_steps=None, torchdynamo=None, tpu_metrics_debug=False, tpu_num_cores=None, use_cpu=False, use_ipex=False, use_legacy_prediction_loop=False, use_mps_device=False, warmup_ratio=0.0, warmup_steps=50, weight_decay=0.0, ) /root/anaconda/envs/distil-whisper/lib/python3.9/site-packages/huggingface_hub/utils/_deprecation.py:131: FutureWarning: 'Repository' (from 'huggingface_hub.repository') is deprecated and will be removed from version '1.0'. Please prefer the http-based alternatives instead. Given its large adoption in legacy code, the complete removal is only planned on next major release. For more details, please read https://huggingface.co/docs/huggingface_hub/concepts/git_vs_http. warnings.warn(warning_message, FutureWarning) /root/distil-whisper-large-v3-ptbr/./ is already a clone of https://huggingface.co/freds0/distil-whisper-large-v3-ptbr. Make sure you pull the latest changes with `repo.git_pull()`. 09/17/2024 17:17:17 - WARNING - huggingface_hub.repository - /root/distil-whisper-large-v3-ptbr/./ is already a clone of https://huggingface.co/freds0/distil-whisper-large-v3-ptbr. Make sure you pull the latest changes with `repo.git_pull()`. Combining datasets...: 0%| | 0/2 [00:00<?, ?it/s] Combining datasets...: 0%| | 0/2 [00:04<?, ?it/s] Traceback (most recent call last): File "/root/distil-whisper-large-v3-ptbr/run_distillation.py", line 1632, in <module> main() File "/root/distil-whisper-large-v3-ptbr/run_distillation.py", line 799, in main raw_datasets["train"] = load_multiple_datasets( File "/root/distil-whisper-large-v3-ptbr/run_distillation.py", line 594, in load_multiple_datasets dataset = dataset.cast_column("audio", datasets.features.Audio(sampling_rate)) File "/root/anaconda/envs/distil-whisper/lib/python3.9/site-packages/datasets/fingerprint.py", line 442, in wrapper out = func(dataset, *args, **kwargs) File "/root/anaconda/envs/distil-whisper/lib/python3.9/site-packages/datasets/arrow_dataset.py", line 2096, in cast_column dataset._data = dataset._data.cast(dataset.features.arrow_schema) File "/root/anaconda/envs/distil-whisper/lib/python3.9/site-packages/datasets/table.py", line 1577, in cast table = table_cast(self.table, target_schema, *args, **kwargs) File "/root/anaconda/envs/distil-whisper/lib/python3.9/site-packages/datasets/table.py", line 2283, in table_cast return cast_table_to_schema(table, schema) File "/root/anaconda/envs/distil-whisper/lib/python3.9/site-packages/datasets/table.py", line 2237, in cast_table_to_schema raise CastError( datasets.table.CastError: Couldn't cast filename: struct<bytes: binary, path: string> child 0, bytes: binary child 1, path: string duration: double transcript: string transcript_mms: string levenshtein: double client_id: int64 filesize: int64 num_words: int64 -- schema metadata -- huggingface: '{"info": {"features": {"filename": {"sampling_rate": 24000,' + 390 to {'filename': Audio(sampling_rate=24000, mono=True, decode=True, id=None), 'duration': Value(dtype='float64', id=None), 'transcript': Value(dtype='string', id=None), 'transcript_mms': Value(dtype='string', id=None), 'levenshtein': Value(dtype='float64', id=None), 'client_id': Value(dtype='int64', id=None), 'filesize': Value(dtype='int64', id=None), 'num_words': Value(dtype='int64', id=None), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None)} because column names don't match Traceback (most recent call last): File "/root/anaconda/envs/distil-whisper/bin/accelerate", line 8, in <module> sys.exit(main()) File "/root/anaconda/envs/distil-whisper/lib/python3.9/site-packages/accelerate/commands/accelerate_cli.py", line 48, in main args.func(args) File "/root/anaconda/envs/distil-whisper/lib/python3.9/site-packages/accelerate/commands/launch.py", line 1174, in launch_command simple_launcher(args) File "/root/anaconda/envs/distil-whisper/lib/python3.9/site-packages/accelerate/commands/launch.py", line 769, in simple_launcher raise subprocess.CalledProcessError(returncode=process.returncode, cmd=cmd) subprocess.CalledProcessError: Command '['/root/anaconda/envs/distil-whisper/bin/python', 'run_distillation.py', '--model_name_or_path', './distil-large-v3-init', '--teacher_model_name_or_path', 'openai/whisper-large-v3', '--train_dataset_name', 'freds0/cml_tts_dataset_portuguese+freds0/cml_tts_dataset_portuguese', '--train_split_name', 'train+test', '--train_dataset_config_name', 'default+default', '--text_column_name', 'transcript+transcript', '--eval_dataset_name', 'freds0/cml_tts_dataset_portuguese', '--eval_text_column_name', 'transcript', '--eval_steps', '1000', '--save_steps', '1000', '--warmup_steps', '50', '--learning_rate', '0.0001', '--lr_scheduler_type', 'constant_with_warmup', '--timestamp_probability', '0.2', '--condition_on_prev_probability', '0.2', '--language', 'pl', '--task', 'transcribe', '--logging_steps', '25', '--save_total_limit', '1', '--max_steps', '5000', '--wer_threshold', '20', '--per_device_train_batch_size', '32', '--per_device_eval_batch_size', '32', '--dataloader_num_workers', '8', '--preprocessing_num_workers', '8', '--ddp_timeout', '7200', '--dtype', 'bfloat16', '--output_dir', './', '--do_train', '--do_eval', '--gradient_checkpointing', '--overwrite_output_dir', '--predict_with_generate', '--freeze_encoder', '--streaming', 'False', '--push_to_hub']' returned non-zero exit status 1. |