Monda commited on
Commit
0c3e810
·
verified ·
1 Parent(s): b750b1c

Training in progress, step 5

Browse files
config.json CHANGED
@@ -4,6 +4,8 @@
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
 
 
7
  "hidden_act": "gelu",
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
@@ -25,11 +27,16 @@
25
  "num_attention_heads": 12,
26
  "num_hidden_layers": 12,
27
  "pad_token_id": 0,
 
 
 
 
 
28
  "position_embedding_type": "absolute",
29
  "problem_type": "single_label_classification",
30
  "torch_dtype": "float32",
31
  "transformers_version": "4.51.1",
32
  "type_vocab_size": 2,
33
  "use_cache": true,
34
- "vocab_size": 64000
35
  }
 
4
  ],
5
  "attention_probs_dropout_prob": 0.1,
6
  "classifier_dropout": null,
7
+ "directionality": "bidi",
8
+ "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 768,
 
27
  "num_attention_heads": 12,
28
  "num_hidden_layers": 12,
29
  "pad_token_id": 0,
30
+ "pooler_fc_size": 768,
31
+ "pooler_num_attention_heads": 12,
32
+ "pooler_num_fc_layers": 3,
33
+ "pooler_size_per_head": 128,
34
+ "pooler_type": "first_token_transform",
35
  "position_embedding_type": "absolute",
36
  "problem_type": "single_label_classification",
37
  "torch_dtype": "float32",
38
  "transformers_version": "4.51.1",
39
  "type_vocab_size": 2,
40
  "use_cache": true,
41
+ "vocab_size": 100000
42
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bb3ddf86caa21f3019bf06d62cead356d864dd86b7624d3917d1d35ff5472aaa
3
- size 540806148
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1540b5c8d98dbc888bea062c8a98bc90e553852a900aa688729acce6bb2ad191
3
+ size 651398148
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:585afd9a6a00cf30824acdea0fa8c837628db4dea4440a72491245a9256a9676
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:940da1f61ec8940f29927d22dbc004e07e619075dea48dab349f6d1922bc0386
3
  size 5304
wandb/debug.log CHANGED
@@ -1,39 +1,21 @@
1
- 2025-05-03 21:17:45,614 INFO MainThread:1653 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
- 2025-05-03 21:17:45,614 INFO MainThread:1653 [wandb_setup.py:_flush():68] Configure stats pid to 1653
3
- 2025-05-03 21:17:45,614 INFO MainThread:1653 [wandb_setup.py:_flush():68] Loading settings from /root/.config/wandb/settings
4
- 2025-05-03 21:17:45,614 INFO MainThread:1653 [wandb_setup.py:_flush():68] Loading settings from /kaggle/working/wandb/settings
5
- 2025-05-03 21:17:45,614 INFO MainThread:1653 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
- 2025-05-03 21:17:45,614 INFO MainThread:1653 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /kaggle/working/wandb/run-20250503_211745-af8yueej/logs/debug.log
7
- 2025-05-03 21:17:45,614 INFO MainThread:1653 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /kaggle/working/wandb/run-20250503_211745-af8yueej/logs/debug-internal.log
8
- 2025-05-03 21:17:45,614 INFO MainThread:1653 [wandb_init.py:monkeypatch_ipython():589] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7c4809abe010>
9
- 2025-05-03 21:17:45,615 INFO MainThread:1653 [wandb_init.py:init():756] calling init triggers
10
- 2025-05-03 21:17:45,615 INFO MainThread:1653 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
11
  config: {'_wandb': {}}
12
- 2025-05-03 21:17:45,615 INFO MainThread:1653 [wandb_init.py:init():789] starting backend
13
- 2025-05-03 21:17:45,825 INFO MainThread:1653 [wandb_init.py:init():793] sending inform_init request
14
- 2025-05-03 21:17:45,830 INFO MainThread:1653 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
15
- 2025-05-03 21:17:45,830 INFO MainThread:1653 [wandb_init.py:init():808] backend started and connected
16
- 2025-05-03 21:17:45,837 INFO MainThread:1653 [wandb_run.py:_label_probe_notebook():1196] probe notebook
17
- 2025-05-03 21:17:51,105 INFO MainThread:1653 [wandb_init.py:init():901] updated telemetry
18
- 2025-05-03 21:17:51,106 INFO MainThread:1653 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
19
- 2025-05-03 21:17:51,217 INFO MainThread:1653 [wandb_init.py:init():994] starting run threads in backend
20
- 2025-05-03 21:17:51,791 INFO MainThread:1653 [wandb_run.py:_console_start():2385] atexit reg
21
- 2025-05-03 21:17:51,792 INFO MainThread:1653 [wandb_run.py:_redirect():2235] redirect: wrap_raw
22
- 2025-05-03 21:17:51,793 INFO MainThread:1653 [wandb_run.py:_redirect():2300] Wrapping output streams.
23
- 2025-05-03 21:17:51,793 INFO MainThread:1653 [wandb_run.py:_redirect():2325] Redirects installed.
24
- 2025-05-03 21:17:51,797 INFO MainThread:1653 [wandb_init.py:init():1036] run started, returning control to user process
25
- 2025-05-03 21:17:51,797 INFO MainThread:1653 [jupyter.py:save_ipynb():386] not saving jupyter notebook
26
- 2025-05-03 21:17:51,797 INFO MainThread:1653 [wandb_init.py:_pause_backend():554] pausing backend
27
- 2025-05-03 21:17:51,807 INFO MainThread:1653 [wandb_init.py:_resume_backend():559] resuming backend
28
- 2025-05-03 21:17:52,024 INFO MainThread:1653 [jupyter.py:save_ipynb():386] not saving jupyter notebook
29
- 2025-05-03 21:17:52,025 INFO MainThread:1653 [wandb_init.py:_pause_backend():554] pausing backend
30
- 2025-05-03 21:17:52,029 INFO MainThread:1653 [wandb_init.py:_resume_backend():559] resuming backend
31
- 2025-05-03 21:17:52,061 INFO MainThread:1653 [jupyter.py:save_ipynb():386] not saving jupyter notebook
32
- 2025-05-03 21:17:52,061 INFO MainThread:1653 [wandb_init.py:_pause_backend():554] pausing backend
33
- 2025-05-03 21:17:58,421 INFO MainThread:1653 [wandb_init.py:_resume_backend():559] resuming backend
34
- 2025-05-03 21:17:58,882 INFO MainThread:1653 [jupyter.py:save_ipynb():386] not saving jupyter notebook
35
- 2025-05-03 21:17:58,882 INFO MainThread:1653 [wandb_init.py:_pause_backend():554] pausing backend
36
- 2025-05-03 21:17:58,886 INFO MainThread:1653 [wandb_init.py:_resume_backend():559] resuming backend
37
- 2025-05-03 21:17:59,283 INFO MainThread:1653 [wandb_run.py:_config_callback():1253] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'aubmindlab/bert-base-arabertv02', '_attn_implementation_autoset': True, 'transformers_version': '4.51.1', 'model_type': 'bert', 'vocab_size': 64000, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.2, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/May03_21-17-52_45a6a5d4a1e9', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'macro_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'tp_size': 0, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False}
38
- 2025-05-03 21:17:59,291 INFO MainThread:1653 [wandb_config.py:__setitem__():154] config set model/num_parameters = 135195651 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7c4809c1d790>>
39
- 2025-05-03 21:17:59,291 INFO MainThread:1653 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 135195651 None
 
1
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /kaggle/working/wandb/run-20250504_140029-gm7oud70/logs/debug.log
2
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /kaggle/working/wandb/run-20250504_140029-gm7oud70/logs/debug-internal.log
3
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:init():756] calling init triggers
4
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
 
 
 
 
 
 
5
  config: {'_wandb': {}}
6
+ 2025-05-04 14:00:29,422 INFO MainThread:31 [wandb_init.py:init():781] wandb.init() called when a run is still active
7
+ 2025-05-04 14:00:29,422 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
8
+ 2025-05-04 14:00:29,422 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
9
+ 2025-05-04 14:00:30,369 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
10
+ 2025-05-04 14:00:31,453 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
11
+ 2025-05-04 14:00:31,453 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
12
+ 2025-05-04 14:00:31,458 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
13
+ 2025-05-04 14:00:31,490 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
14
+ 2025-05-04 14:00:31,490 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
15
+ 2025-05-04 14:00:32,913 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
16
+ 2025-05-04 14:00:33,197 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
17
+ 2025-05-04 14:00:33,197 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
18
+ 2025-05-04 14:00:33,202 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
19
+ 2025-05-04 14:00:33,664 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'UBC-NLP/MARBERT', '_attn_implementation_autoset': True, 'transformers_version': '4.51.1', 'directionality': 'bidi', 'gradient_checkpointing': False, 'model_type': 'bert', 'pooler_fc_size': 768, 'pooler_num_attention_heads': 12, 'pooler_num_fc_layers': 3, 'pooler_size_per_head': 128, 'pooler_type': 'first_token_transform', 'vocab_size': 100000, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.2, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/May04_14-00-31_40834be6d3ef', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'macro_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'tp_size': 0, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False}
20
+ 2025-05-04 14:00:33,670 INFO MainThread:31 [wandb_config.py:__setitem__():154] config set model/num_parameters = 162843651 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f46cb2f3cd0>>
21
+ 2025-05-04 14:00:33,670 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 162843651 None
 
 
 
 
 
 
 
 
 
 
 
 
wandb/run-20250504_135537-fqirr53c/files/output.log ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
2
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
3
+ wandb: WARNING The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.
4
+ wandb: WARNING Calling wandb.login() after wandb.init() has no effect.
5
+ Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
6
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
7
+ wandb: WARNING Calling wandb.login() after wandb.init() has no effect.
8
+ Some weights of BertForSequenceClassification were not initialized from the model checkpoint at UBC-NLP/MARBERT and are newly initialized: ['classifier.bias', 'classifier.weight']
9
+ You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
10
+ /usr/local/lib/python3.11/dist-packages/torch/nn/parallel/_functions.py:71: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.
11
+ warnings.warn(
12
+ /usr/local/lib/python3.11/dist-packages/torch/nn/parallel/_functions.py:71: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.
13
+ warnings.warn(
14
+ /usr/local/lib/python3.11/dist-packages/torch/nn/parallel/_functions.py:71: UserWarning: Was asked to gather along dimension 0, but all input tensors were scalars; will instead unsqueeze and return a vector.
15
+ warnings.warn(
wandb/run-20250504_135537-fqirr53c/files/requirements.txt ADDED
@@ -0,0 +1,863 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fsspec==2024.12.0
2
+ bq_helper==0.4.1
3
+ nltk==3.9.1
4
+ regex==2024.11.6
5
+ joblib==1.4.2
6
+ click==8.1.8
7
+ tqdm==4.67.1
8
+ lightgbm==4.6.0
9
+ pytools==2025.1.2
10
+ pycuda==2025.1
11
+ torchtune==0.6.1
12
+ tbb==2022.1.0
13
+ mkl==2025.1.0
14
+ tbb4py==2022.1.0
15
+ shapely==2.1.0
16
+ libpysal==4.9.2
17
+ intel-cmplr-lib-ur==2024.2.0
18
+ intel-cmplr-lib-rt==2024.2.0
19
+ mkl-umath==0.1.1
20
+ mkl-service==2.4.1
21
+ mkl-random==1.2.4
22
+ numpy==1.26.4
23
+ intel-openmp==2024.2.0
24
+ mkl-fft==1.3.8
25
+ pynvjitlink-cu12==0.5.2
26
+ tblib==3.1.0
27
+ tsfresh==0.21.0
28
+ psutil==7.0.0
29
+ raft-dask-cu12==25.2.0
30
+ partd==1.4.2
31
+ treelite==4.4.1
32
+ dask==2024.12.1
33
+ cupy-cuda12x==13.4.1
34
+ pynvml==12.0.0
35
+ nvidia-ml-py==12.570.86
36
+ PyWavelets==1.8.0
37
+ nvidia-cusolver-cu12==11.7.3.90
38
+ ucx-py-cu12==0.42.0
39
+ attrs==25.3.0
40
+ libcudf-cu12==25.2.2
41
+ nvidia-nvcomp-cu12==4.2.0.11
42
+ numba-cuda==0.2.0
43
+ libcuml-cu12==25.2.1
44
+ typing_extensions==4.13.1
45
+ urllib3==2.3.0
46
+ msgpack==1.1.0
47
+ fastrlock==0.8.3
48
+ idna==3.10
49
+ libkvikio-cu12==25.2.1
50
+ libucx-cu12==1.18.0
51
+ distributed==2024.12.1
52
+ libcuvs-cu12==25.2.1
53
+ MarkupSafe==3.0.2
54
+ dask-cudf-cu12==25.2.2
55
+ charset-normalizer==3.4.1
56
+ dask-expr==1.1.21
57
+ nvidia-cusparse-cu12==12.5.8.93
58
+ nvidia-cublas-cu12==12.8.4.1
59
+ rich==14.0.0
60
+ dask-cuda==25.2.0
61
+ zict==3.0.0
62
+ toolz==1.0.0
63
+ nvidia-cudnn-cu12==9.3.0.75
64
+ cuml-cu12==25.2.1
65
+ pylibcudf-cu12==25.2.2
66
+ locket==1.0.0
67
+ scipy==1.15.2
68
+ graphviz==0.20.3
69
+ zipp==3.21.0
70
+ importlib_metadata==8.6.1
71
+ python-dateutil==2.9.0.post0
72
+ certifi==2025.1.31
73
+ markdown-it-py==3.0.0
74
+ tzdata==2025.2
75
+ tabulate==0.9.0
76
+ statsmodels==0.14.4
77
+ mdurl==0.1.2
78
+ patsy==1.0.1
79
+ scikit-learn==1.2.2
80
+ six==1.17.0
81
+ packaging==24.2
82
+ pylibraft-cu12==25.2.0
83
+ rapids-dask-dependency==25.2.0
84
+ numba==0.60.0
85
+ cloudpickle==3.1.1
86
+ nvtx==0.2.11
87
+ threadpoolctl==3.6.0
88
+ cudf-cu12==25.2.2
89
+ cuda-bindings==12.8.0
90
+ llvmlite==0.43.0
91
+ pandas==2.2.3
92
+ requests==2.32.3
93
+ eli5==0.13.0
94
+ Pygments==2.19.1
95
+ pytz==2025.2
96
+ cachetools==5.5.2
97
+ Jinja2==3.1.6
98
+ rmm-cu12==25.2.0
99
+ nvidia-cufft-cu12==11.3.3.83
100
+ cuda-python==12.8.0
101
+ libucxx-cu12==0.42.0
102
+ PyYAML==6.0.2
103
+ tornado==6.4.2
104
+ category_encoders==2.7.0
105
+ stumpy==1.13.0
106
+ nvidia-nvjitlink-cu12==12.8.93
107
+ cuvs-cu12==25.2.1
108
+ nvidia-curand-cu12==10.3.9.90
109
+ libraft-cu12==25.2.0
110
+ ucxx-cu12==0.42.0
111
+ sortedcontainers==2.4.0
112
+ pyarrow==19.0.1
113
+ distributed-ucxx-cu12==0.42.0
114
+ learntools==0.3.5
115
+ protobuf==3.20.3
116
+ google-cloud-storage==2.14.0
117
+ google-cloud-vision==3.10.1
118
+ google-cloud-automl==1.0.1
119
+ grpcio-status==1.48.2
120
+ google-cloud-translate==3.12.1
121
+ keras==3.5.0
122
+ google-cloud-videointelligence==2.16.1
123
+ google-api-core==1.34.1
124
+ pycparser==2.22
125
+ annotated-types==0.7.0
126
+ dnspython==2.7.0
127
+ in-toto-attestation==0.9.3
128
+ typing-inspection==0.4.0
129
+ cryptography==44.0.2
130
+ id==1.5.0
131
+ PyJWT==2.10.1
132
+ pyOpenSSL==25.0.0
133
+ email_validator==2.2.0
134
+ huggingface-hub==0.30.2
135
+ cffi==1.17.1
136
+ platformdirs==4.3.7
137
+ multiprocess==0.70.16
138
+ multidict==6.2.0
139
+ tuf==5.1.0
140
+ aiohappyeyeballs==2.6.1
141
+ pyasn1==0.6.1
142
+ rfc3161-client==0.1.2
143
+ filelock==3.18.0
144
+ model-signing==1.0.0
145
+ pydantic==2.11.3
146
+ hpack==4.1.0
147
+ xxhash==3.5.0
148
+ propcache==0.3.1
149
+ sigstore-protobuf-specs==0.3.2
150
+ grpclib==0.4.8rc2
151
+ kagglehub==0.3.11
152
+ aiosignal==1.3.2
153
+ sigstore==3.6.1
154
+ aiohttp==3.11.16
155
+ betterproto==2.0.0b6
156
+ datasets==3.5.0
157
+ pydantic_core==2.33.1
158
+ securesystemslib==1.2.0
159
+ hyperframe==6.1.0
160
+ rfc8785==0.1.4
161
+ sigstore-rekor-types==0.0.18
162
+ h2==4.2.0
163
+ dill==0.3.8
164
+ frozenlist==1.5.0
165
+ yarl==1.19.0
166
+ ppft==1.7.6.9
167
+ fiona==1.10.1
168
+ urwid_readline==0.15.1
169
+ coverage==7.8.0
170
+ Wand==0.6.13
171
+ qgrid==1.3.1
172
+ jupyter_client==8.6.3
173
+ woodwork==0.31.0
174
+ overrides==7.7.0
175
+ y-py==0.6.2
176
+ ipywidgets==8.1.5
177
+ ydata-profiling==4.16.1
178
+ hep_ml==0.7.3
179
+ scikit-multilearn==0.2.0
180
+ cytoolz==1.0.1
181
+ pytesseract==0.3.13
182
+ click-plugins==1.1.1
183
+ onnx==1.17.0
184
+ odfpy==1.4.1
185
+ mpld3==0.5.10
186
+ Boruta==0.4.3
187
+ fqdn==1.5.1
188
+ torchinfo==1.8.0
189
+ clint==0.5.1
190
+ pybind11==2.13.6
191
+ kaggle==1.7.4.2
192
+ torchao==0.10.0
193
+ transformers==4.51.1
194
+ python-lsp-server==1.12.2
195
+ jupyter_server_terminals==0.5.3
196
+ pox==0.3.5
197
+ keras-core==0.1.7
198
+ pygltflib==1.16.3
199
+ botocore==1.37.29
200
+ pandas-profiling==3.6.6
201
+ asttokens==3.0.0
202
+ scikit-surprise==1.1.4
203
+ vtk==9.3.1
204
+ jupyter-ydoc==0.2.5
205
+ aiofiles==22.1.0
206
+ deap==1.4.2
207
+ isoduration==20.11.0
208
+ featuretools==1.31.0
209
+ plotly-express==0.4.1
210
+ pycryptodomex==3.22.0
211
+ docstring-to-markdown==0.16
212
+ types-python-dateutil==2.9.0.20241206
213
+ easyocr==1.7.2
214
+ kornia==0.8.0
215
+ slicer==0.0.7
216
+ ImageHash==4.3.1
217
+ pyemd==1.0.0
218
+ fuzzywuzzy==0.18.0
219
+ xgboost==2.0.3
220
+ pandasql==0.7.3
221
+ update-checker==0.18.0
222
+ catboost==1.2.7
223
+ pathos==0.3.2
224
+ widgetsnbextension==4.0.13
225
+ jupyter_server_fileid==0.9.3
226
+ urwid==2.6.16
227
+ fasttext==0.9.3
228
+ orderly-set==5.3.1
229
+ stopit==1.1.2
230
+ haversine==2.9.0
231
+ colorlog==6.9.0
232
+ jupyter_server==2.12.5
233
+ pytorch-lightning==2.5.1
234
+ geojson==3.2.0
235
+ uri-template==1.3.0
236
+ notebook==6.5.4
237
+ pytorch-ignite==0.5.2
238
+ fury==0.12.0
239
+ igraph==0.11.8
240
+ puremagic==1.28
241
+ olefile==0.47
242
+ semver==3.0.4
243
+ gymnasium==0.29.0
244
+ nvidia-cuda-cupti-cu12==12.4.127
245
+ TPOT==0.12.1
246
+ tensorflow-cloud==0.1.5
247
+ torchdata==0.11.0
248
+ shap==0.44.1
249
+ rtree==1.4.0
250
+ ghapi==1.0.6
251
+ ninja==1.11.1.4
252
+ torchmetrics==1.7.1
253
+ Cartopy==0.24.1
254
+ nbdev==2.3.36
255
+ jupyter-lsp==1.5.1
256
+ pycryptodome==3.22.0
257
+ gpxpy==1.6.2
258
+ mlcrate==0.2.0
259
+ papermill==2.6.0
260
+ jupyterlab==3.6.8
261
+ args==0.1.0
262
+ typing-inspect==0.9.0
263
+ omegaconf==2.3.0
264
+ PyUpSet==0.1.1.post7
265
+ dacite==1.9.2
266
+ qtconsole==5.6.1
267
+ visions==0.8.1
268
+ trx-python==0.3
269
+ Chessnut==0.4.1
270
+ beartype==0.20.2
271
+ lml==0.2.0
272
+ jmespath==1.0.1
273
+ jupyterlab_server==2.27.3
274
+ ypy-websocket==0.8.4
275
+ ansicolors==1.1.8
276
+ tensorflow_decision_forests==1.11.0
277
+ path.py==12.5.0
278
+ ray==2.44.1
279
+ blobfile==3.0.0
280
+ tensorflow-io==0.37.1
281
+ pymc3==3.11.4
282
+ wavio==0.0.9
283
+ cligj==0.7.2
284
+ pdf2image==1.17.0
285
+ dipy==1.11.0
286
+ pyaml==25.1.0
287
+ pypdf==5.4.0
288
+ line_profiler==4.2.0
289
+ pydub==0.25.1
290
+ pyLDAvis==3.4.1
291
+ antlr4-python3-runtime==4.9.3
292
+ boto3==1.37.29
293
+ Janome==0.5.0
294
+ langid==1.1.6
295
+ pyclipper==1.3.0.post6
296
+ scikit-plot==0.3.7
297
+ pymongo==4.12.0
298
+ pydegensac==0.1.2
299
+ jupyter_server_ydoc==0.8.0
300
+ phik==0.12.4
301
+ keras-tuner==1.4.7
302
+ colorama==0.4.6
303
+ daal==2025.4.0
304
+ json5==0.12.0
305
+ PyArabic==0.6.15
306
+ cesium==0.12.1
307
+ kornia_rs==0.1.8
308
+ optuna==4.2.1
309
+ ydf==0.9.0
310
+ ujson==5.10.0
311
+ alembic==1.15.2
312
+ annoy==1.17.3
313
+ h2o==3.46.0.7
314
+ Pympler==1.1
315
+ s3fs==0.4.2
316
+ geopandas==0.14.4
317
+ nbconvert==6.4.5
318
+ emoji==2.14.1
319
+ SimpleITK==2.4.1
320
+ watchdog==6.0.0
321
+ funcy==2.0
322
+ deepdiff==8.4.2
323
+ testpath==0.6.0
324
+ nilearn==0.11.1
325
+ openslide-python==1.4.1
326
+ rfc3986-validator==0.1.1
327
+ s3transfer==0.11.4
328
+ nvidia-cuda-runtime-cu12==12.4.127
329
+ nbclient==0.5.13
330
+ Theano==1.0.5
331
+ wurlitzer==3.1.1
332
+ python-bidi==0.6.6
333
+ plum-dispatch==2.5.7
334
+ squarify==0.4.4
335
+ comm==0.2.2
336
+ dataclasses-json==0.6.7
337
+ jupyter-events==0.12.0
338
+ pettingzoo==1.24.0
339
+ lightning-utilities==0.14.3
340
+ matplotlib==3.7.5
341
+ segment_anything==1.0
342
+ datashader==0.17.0
343
+ kaggle-environments==1.16.11
344
+ marshmallow==3.26.1
345
+ setuptools-scm==8.2.0
346
+ rgf-python==3.12.0
347
+ xvfbwrapper==0.2.10
348
+ ipympl==0.9.7
349
+ tiktoken==0.9.0
350
+ stable-baselines3==2.1.0
351
+ nvidia-cuda-nvrtc-cu12==12.4.127
352
+ jedi==0.19.2
353
+ jupyterlab-lsp==3.10.2
354
+ python-lsp-jsonrpc==1.1.2
355
+ aiosqlite==0.21.0
356
+ QtPy==2.4.3
357
+ pydicom==3.0.1
358
+ multimethod==1.12
359
+ docker==7.1.0
360
+ mypy-extensions==1.0.0
361
+ arrow==1.3.0
362
+ isoweek==1.3.3
363
+ texttable==1.7.0
364
+ sphinx-rtd-theme==0.2.4
365
+ kt-legacy==1.0.5
366
+ pyct==0.5.0
367
+ seaborn==0.12.2
368
+ pyexcel-io==0.6.7
369
+ Shimmy==1.3.0
370
+ rfc3339-validator==0.1.4
371
+ mamba==0.11.3
372
+ path==17.1.0
373
+ Mako==1.3.9
374
+ pyexcel-ods==0.6.0
375
+ preprocessing==0.1.13
376
+ lime==0.2.0.1
377
+ htmlmin==0.1.12
378
+ python-json-logger==3.3.0
379
+ Theano-PyMC==1.1.2
380
+ bayesian-optimization==2.0.3
381
+ keras-cv==0.9.0
382
+ pudb==2024.1.3
383
+ gatspy==0.3
384
+ hf_transfer==0.1.9
385
+ scikit-learn-intelex==2025.4.0
386
+ scikit-optimize==0.10.2
387
+ mne==1.9.0
388
+ openslide-bin==4.0.0.6
389
+ mistune==0.8.4
390
+ execnb==0.1.14
391
+ google-colab==1.0.0
392
+ astunparse==1.6.3
393
+ ipython==7.34.0
394
+ referencing==0.36.2
395
+ types-pytz==2025.1.0.20250204
396
+ librosa==0.10.2.post1
397
+ soxr==0.5.0.post1
398
+ libclang==18.1.1
399
+ keras-nlp==0.18.1
400
+ imageio==2.37.0
401
+ gensim==4.3.3
402
+ clarabel==0.10.0
403
+ h11==0.14.0
404
+ imagesize==1.4.1
405
+ py-cpuinfo==9.0.0
406
+ geemap==0.35.1
407
+ debugpy==1.8.0
408
+ jupyterlab_pygments==0.3.0
409
+ backcall==0.2.0
410
+ tensorflow-hub==0.16.1
411
+ requests-oauthlib==2.0.0
412
+ scooby==0.10.0
413
+ opencv-python-headless==4.11.0.86
414
+ en-core-web-sm==3.7.1
415
+ dopamine_rl==4.1.2
416
+ langchain-text-splitters==0.3.6
417
+ sklearn-compat==0.1.3
418
+ ipython-genutils==0.2.0
419
+ pyparsing==3.2.1
420
+ catalogue==2.0.10
421
+ sphinxcontrib-devhelp==2.0.0
422
+ sklearn-pandas==2.2.0
423
+ Markdown==3.7
424
+ sphinxcontrib-qthelp==2.0.0
425
+ openai==1.61.1
426
+ google-auth-httplib2==0.2.0
427
+ Flask==3.1.0
428
+ preshed==3.0.9
429
+ albumentations==2.0.4
430
+ marisa-trie==1.2.1
431
+ ipyleaflet==0.19.2
432
+ chardet==5.2.0
433
+ jupyter_core==5.7.2
434
+ simple-parsing==0.1.7
435
+ gin-config==0.5.0
436
+ osqp==0.6.7.post3
437
+ ipython-sql==0.5.0
438
+ toml==0.10.2
439
+ stringzilla==3.11.3
440
+ polars==1.9.0
441
+ jsonpointer==3.0.0
442
+ opentelemetry-semantic-conventions==0.37b0
443
+ ndindex==1.9.2
444
+ h5py==3.12.1
445
+ tensorflow-io-gcs-filesystem==0.37.1
446
+ datascience==0.17.6
447
+ alabaster==1.0.0
448
+ pygit2==1.17.0
449
+ pyshp==2.3.1
450
+ folium==0.19.4
451
+ spacy-legacy==3.0.12
452
+ diffusers==0.32.2
453
+ etils==1.12.0
454
+ fastcore==1.7.29
455
+ requests-toolbelt==1.0.0
456
+ triton==3.1.0
457
+ thinc==8.2.5
458
+ holidays==0.66
459
+ PyDrive==1.3.1
460
+ pytest==8.3.4
461
+ imutils==0.5.4
462
+ sentence-transformers==3.4.1
463
+ opt_einsum==3.4.0
464
+ moviepy==1.0.3
465
+ nbclassic==1.2.0
466
+ importlib_resources==6.5.2
467
+ xarray-einstats==0.8.0
468
+ lazy_loader==0.4
469
+ ipyevents==2.0.2
470
+ immutabledict==4.2.1
471
+ google-cloud-functions==1.19.0
472
+ music21==9.3.0
473
+ tensorflow-metadata==1.16.1
474
+ nx-cugraph-cu12==24.12.0
475
+ linkify-it-py==2.0.3
476
+ accelerate==1.3.0
477
+ language_data==1.3.0
478
+ nvidia-cuda-nvcc-cu12==12.5.82
479
+ google-cloud-iam==2.18.0
480
+ torchsummary==1.5.1
481
+ webencodings==0.5.1
482
+ webcolors==24.11.1
483
+ pydot==3.0.4
484
+ jellyfish==1.1.0
485
+ gym==0.25.2
486
+ tcmlib==1.2.0
487
+ cramjam==2.9.1
488
+ opentelemetry-api==1.16.0
489
+ gdown==5.2.0
490
+ pymystem3==0.2.0
491
+ parso==0.8.4
492
+ py4j==0.10.9.7
493
+ entrypoints==0.4
494
+ proto-plus==1.26.0
495
+ fastprogress==1.0.3
496
+ pyogrio==0.10.0
497
+ oauthlib==3.2.2
498
+ fastjsonschema==2.21.1
499
+ psycopg2==2.9.10
500
+ missingno==0.5.2
501
+ pandas-datareader==0.10.0
502
+ google-spark-connect==0.5.2
503
+ Deprecated==1.2.18
504
+ pooch==1.8.2
505
+ cycler==0.12.1
506
+ cvxpy==1.6.0
507
+ tensorboard==2.18.0
508
+ typeguard==4.4.1
509
+ googleapis-common-protos==1.67.0
510
+ inflect==7.5.0
511
+ argon2-cffi-bindings==21.2.0
512
+ namex==0.0.8
513
+ rpds-py==0.22.3
514
+ scikit-image==0.25.1
515
+ nvidia-nccl-cu12==2.21.5
516
+ arviz==0.20.0
517
+ opentelemetry-sdk==1.16.0
518
+ google-cloud-resource-manager==1.14.0
519
+ pandas-gbq==0.26.1
520
+ argon2-cffi==23.1.0
521
+ opencv-contrib-python==4.11.0.86
522
+ sphinxcontrib-applehelp==2.0.0
523
+ jax-cuda12-pjrt==0.4.33
524
+ bleach==6.2.0
525
+ h5netcdf==1.5.0
526
+ defusedxml==0.7.1
527
+ sphinxcontrib-serializinghtml==2.0.0
528
+ google-cloud-dataproc==5.17.0
529
+ more-itertools==10.6.0
530
+ panel==1.6.0
531
+ python-utils==3.9.1
532
+ fonttools==4.56.0
533
+ websockets==14.2
534
+ Pyomo==6.8.2
535
+ pydotplus==2.0.2
536
+ ml-dtypes==0.4.1
537
+ peewee==3.17.9
538
+ google-pasta==0.2.0
539
+ pyzmq==24.0.1
540
+ cmdstanpy==1.2.5
541
+ ipyparallel==8.8.0
542
+ parsy==2.1
543
+ bqplot==0.12.44
544
+ spacy-loggers==1.0.5
545
+ google-ai-generativelanguage==0.6.15
546
+ prophet==1.1.6
547
+ pydata-google-auth==1.9.1
548
+ absl-py==1.4.0
549
+ openpyxl==3.1.5
550
+ vega-datasets==0.9.0
551
+ mpmath==1.3.0
552
+ frozendict==2.4.6
553
+ gcsfs==2024.10.0
554
+ google-cloud-bigquery==3.25.0
555
+ opencv-python==4.11.0.86
556
+ mdit-py-plugins==0.4.2
557
+ zstandard==0.23.0
558
+ torch==2.5.1+cu124
559
+ langcodes==3.5.0
560
+ blinker==1.9.0
561
+ xyzservices==2025.1.0
562
+ googledrivedownloader==1.1.0
563
+ termcolor==2.5.0
564
+ google-generativeai==0.8.4
565
+ et_xmlfile==2.0.0
566
+ jieba==0.42.1
567
+ pluggy==1.5.0
568
+ grpcio==1.70.0
569
+ hyperopt==0.2.7
570
+ python-louvain==0.16
571
+ bigframes==1.36.0
572
+ orbax-checkpoint==0.6.4
573
+ google-auth-oauthlib==1.2.1
574
+ soupsieve==2.6
575
+ PyDrive2==1.21.3
576
+ simsimd==6.2.1
577
+ pyproj==3.7.0
578
+ peft==0.14.0
579
+ imbalanced-learn==0.13.0
580
+ wandb==0.19.6
581
+ wcwidth==0.2.13
582
+ google-cloud-language==2.16.0
583
+ google-cloud-core==2.4.1
584
+ progressbar2==4.5.0
585
+ pexpect==4.9.0
586
+ ptyprocess==0.7.0
587
+ pygame==2.6.1
588
+ docker-pycreds==0.4.0
589
+ Cython==3.0.12
590
+ shellingham==1.5.4
591
+ setproctitle==1.3.4
592
+ CacheControl==0.14.2
593
+ prometheus_client==0.21.1
594
+ nbformat==5.10.4
595
+ python-snappy==0.7.3
596
+ narwhals==1.26.0
597
+ google-cloud-firestore==2.20.0
598
+ nest-asyncio==1.6.0
599
+ chex==0.1.88
600
+ sentry-sdk==2.21.0
601
+ nibabel==5.3.2
602
+ iniconfig==2.0.0
603
+ multipledispatch==1.0.0
604
+ astropy-iers-data==0.2025.2.10.0.33.26
605
+ tf_keras==2.18.0
606
+ networkx==3.4.2
607
+ safetensors==0.5.2
608
+ sentencepiece==0.2.0
609
+ einops==0.8.1
610
+ plotly==5.24.1
611
+ rpy2==3.4.2
612
+ bokeh==3.6.3
613
+ ipytree==0.2.2
614
+ python-box==7.3.2
615
+ scs==3.2.7.post2
616
+ pillow==11.1.0
617
+ textblob==0.19.0
618
+ PyOpenGL==3.1.9
619
+ google-cloud-spanner==3.51.0
620
+ decorator==4.4.2
621
+ google-cloud-datastore==2.20.2
622
+ docstring_parser==0.16
623
+ pickleshare==0.7.5
624
+ wrapt==1.17.2
625
+ pyspark==3.5.4
626
+ pytensor==2.27.1
627
+ GDAL==3.6.4
628
+ ibis-framework==9.2.0
629
+ holoviews==1.20.0
630
+ wasabi==1.1.3
631
+ anyio==3.7.1
632
+ tensorflow-text==2.18.1
633
+ optax==0.2.4
634
+ gast==0.6.0
635
+ Werkzeug==3.1.3
636
+ colorcet==3.1.0
637
+ tensorstore==0.1.71
638
+ atpublic==4.1.0
639
+ blis==0.7.11
640
+ humanize==4.11.0
641
+ prettytable==3.14.0
642
+ spanner-graph-notebook==1.1.1
643
+ python-slugify==8.0.4
644
+ earthengine-api==1.5.2
645
+ miniKanren==1.0.3
646
+ traitlets==5.7.1
647
+ yfinance==0.2.52
648
+ sqlparse==0.5.3
649
+ terminado==0.18.1
650
+ tensorflow-datasets==4.9.7
651
+ pymc==5.20.1
652
+ sphinxcontrib-htmlhelp==2.1.0
653
+ grpc-interceptor==0.15.4
654
+ geocoder==1.38.1
655
+ langchain==0.3.18
656
+ pyviz_comms==3.0.4
657
+ babel==2.17.0
658
+ proglog==0.1.10
659
+ ply==3.11
660
+ audioread==3.0.1
661
+ google-genai==0.8.0
662
+ docutils==0.21.2
663
+ distro==1.9.0
664
+ tf-slim==1.1.0
665
+ orjson==3.10.15
666
+ google-auth==2.27.0
667
+ cons==0.4.6
668
+ gspread==6.1.4
669
+ geographiclib==2.0
670
+ matplotlib-inline==0.1.7
671
+ editdistance==0.8.1
672
+ httpcore==1.0.7
673
+ spacy==3.7.5
674
+ community==1.0.0b1
675
+ tifffile==2025.1.10
676
+ gym-notices==0.0.8
677
+ notebook_shim==0.2.4
678
+ soundfile==0.13.1
679
+ itsdangerous==2.2.0
680
+ jsonpatch==1.33
681
+ plotnine==0.14.5
682
+ prompt_toolkit==3.0.50
683
+ traittypes==0.2.1
684
+ autograd==1.7.0
685
+ text-unidecode==1.3
686
+ pycocotools==2.0.8
687
+ weasel==0.4.1
688
+ srsly==2.5.1
689
+ wordcloud==1.9.4
690
+ langsmith==0.3.8
691
+ cymem==2.0.11
692
+ smart-open==7.1.0
693
+ dlib==19.24.2
694
+ grpc-google-iam-v1==0.14.0
695
+ tenacity==9.0.0
696
+ beautifulsoup4==4.13.3
697
+ umf==0.9.1
698
+ tables==3.10.2
699
+ cloudpathlib==0.20.0
700
+ torchvision==0.20.1+cu124
701
+ altair==5.5.0
702
+ cufflinks==0.17.3
703
+ cvxopt==1.3.2
704
+ tokenizers==0.21.0
705
+ PySocks==1.7.1
706
+ uc-micro-py==1.0.3
707
+ xlrd==2.0.1
708
+ numexpr==2.10.2
709
+ promise==2.3
710
+ Send2Trash==1.8.3
711
+ array_record==0.6.0
712
+ treescope==0.1.8
713
+ sniffio==1.3.1
714
+ httplib2==0.22.0
715
+ jupyterlab_widgets==3.0.13
716
+ httpimport==1.4.0
717
+ confection==0.1.5
718
+ uritemplate==4.1.1
719
+ stanio==0.5.1
720
+ easydict==1.13
721
+ Sphinx==8.1.3
722
+ future==1.0.0
723
+ tensorflow==2.18.0
724
+ websocket-client==1.8.0
725
+ flatbuffers==25.2.10
726
+ firebase-admin==6.6.0
727
+ imgaug==0.4.0
728
+ cmake==3.31.4
729
+ Bottleneck==1.4.2
730
+ kiwisolver==1.4.8
731
+ ale-py==0.10.1
732
+ fastai==2.7.18
733
+ snowballstemmer==2.2.0
734
+ typer==0.15.1
735
+ colour==0.1.5
736
+ google-crc32c==1.6.0
737
+ sphinxcontrib-jsmath==1.0.1
738
+ google-api-python-client==2.160.0
739
+ google-resumable-media==2.7.2
740
+ murmurhash==1.0.12
741
+ portpicker==1.5.2
742
+ Farama-Notifications==0.0.4
743
+ sympy==1.13.1
744
+ optree==0.14.0
745
+ timm==1.0.14
746
+ ipykernel==6.17.1
747
+ pathlib==1.0.1
748
+ jaxlib==0.4.33
749
+ pandas-stubs==2.2.2.240909
750
+ ratelim==0.1.6
751
+ greenlet==3.1.1
752
+ SQLAlchemy==2.0.38
753
+ multitasking==0.0.11
754
+ astropy==7.0.1
755
+ imageio-ffmpeg==0.6.0
756
+ bigquery-magics==0.5.0
757
+ pyperclip==1.9.0
758
+ jsonschema-specifications==2024.10.1
759
+ jax-cuda12-plugin==0.4.33
760
+ torchaudio==2.5.1+cu124
761
+ eerepr==0.1.0
762
+ tinycss2==1.4.0
763
+ jiter==0.8.2
764
+ jsonpickle==4.0.1
765
+ google-cloud-aiplatform==1.79.0
766
+ cyipopt==1.5.0
767
+ oauth2client==4.1.3
768
+ jax==0.4.33
769
+ lxml==5.3.1
770
+ sqlglot==25.6.1
771
+ flax==0.10.3
772
+ etuples==0.3.9
773
+ gspread-dataframe==4.0.0
774
+ geopy==2.4.1
775
+ logical-unification==0.4.6
776
+ natsort==8.4.0
777
+ GitPython==3.1.44
778
+ pyerfa==2.0.1.5
779
+ param==2.2.0
780
+ qdldl==0.1.7.post5
781
+ keras-hub==0.18.1
782
+ pylibcugraph-cu12==24.12.0
783
+ xarray==2025.1.2
784
+ db-dtypes==1.4.1
785
+ gitdb==4.0.12
786
+ mizani==0.13.1
787
+ google-cloud-bigtable==2.28.1
788
+ httpx==0.28.1
789
+ google-cloud-bigquery-connection==1.17.0
790
+ jsonschema==4.23.0
791
+ nvidia-nvtx-cu12==12.4.127
792
+ albucore==0.0.23
793
+ tweepy==4.15.0
794
+ fastdownload==0.0.7
795
+ highspy==1.9.0
796
+ matplotlib-venn==1.1.1
797
+ jupyter-console==6.1.0
798
+ duckdb==1.1.3
799
+ branca==0.8.1
800
+ pyasn1_modules==0.4.1
801
+ pandocfilters==1.5.1
802
+ yellowbrick==1.5
803
+ contourpy==1.3.1
804
+ tzlocal==5.3
805
+ tensorboard-data-server==0.7.2
806
+ google==2.0.3
807
+ jupyter-leaflet==0.19.2
808
+ mlxtend==0.23.4
809
+ blosc2==3.1.0
810
+ smmap==5.0.2
811
+ langchain-core==0.3.35
812
+ wheel==0.45.1
813
+ glob2==0.7
814
+ tensorflow-probability==0.25.0
815
+ colorlover==0.3.0
816
+ google-cloud-pubsub==2.25.0
817
+ ipyfilechooser==0.6.0
818
+ rsa==4.9
819
+ dm-tree==0.1.9
820
+ html5lib==1.1
821
+ python-apt==0.0.0
822
+ setuptools==75.1.0
823
+ types-setuptools==75.8.0.20250210
824
+ requirements-parser==0.9.0
825
+ pip==24.1.2
826
+ PyGObject==3.42.1
827
+ blinker==1.4
828
+ jeepney==0.7.1
829
+ six==1.16.0
830
+ oauthlib==3.2.0
831
+ wadllib==1.3.6
832
+ launchpadlib==1.10.16
833
+ dbus-python==1.2.18
834
+ PyJWT==2.3.0
835
+ importlib-metadata==4.6.4
836
+ httplib2==0.20.2
837
+ zipp==1.0.0
838
+ pyparsing==2.4.7
839
+ python-apt==2.4.0+ubuntu4
840
+ lazr.restfulclient==0.14.4
841
+ SecretStorage==3.3.1
842
+ distro==1.7.0
843
+ lazr.uri==1.0.6
844
+ more-itertools==8.10.0
845
+ cryptography==3.4.8
846
+ keyring==23.5.0
847
+ packaging==24.1
848
+ inflect==7.3.1
849
+ autocommand==2.2.2
850
+ typeguard==4.3.0
851
+ jaraco.text==3.12.1
852
+ importlib_resources==6.4.0
853
+ wheel==0.43.0
854
+ zipp==3.19.2
855
+ platformdirs==4.2.2
856
+ importlib_metadata==8.0.0
857
+ tomli==2.0.1
858
+ jaraco.collections==5.1.0
859
+ more-itertools==10.3.0
860
+ typing_extensions==4.12.2
861
+ backports.tarfile==1.2.0
862
+ jaraco.functools==4.0.1
863
+ jaraco.context==5.3.0
wandb/run-20250504_135537-fqirr53c/files/wandb-metadata.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.6.56+-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.11.11",
4
+ "startedAt": "2025-05-04T13:55:38.193987Z",
5
+ "program": "kaggle.ipynb",
6
+ "email": "[email protected]",
7
+ "root": "/kaggle/working",
8
+ "host": "40834be6d3ef",
9
+ "executable": "/usr/bin/python3",
10
+ "cpu_count": 2,
11
+ "cpu_count_logical": 4,
12
+ "gpu": "Tesla T4",
13
+ "gpu_count": 2,
14
+ "disk": {
15
+ "/": {
16
+ "total": "8656922775552",
17
+ "used": "6740762419200"
18
+ }
19
+ },
20
+ "memory": {
21
+ "total": "33662345216"
22
+ },
23
+ "cpu": {
24
+ "count": 2,
25
+ "countLogical": 4
26
+ },
27
+ "gpu_nvidia": [
28
+ {
29
+ "name": "Tesla T4",
30
+ "memoryTotal": "16106127360",
31
+ "cudaCores": 2560,
32
+ "architecture": "Turing"
33
+ },
34
+ {
35
+ "name": "Tesla T4",
36
+ "memoryTotal": "16106127360",
37
+ "cudaCores": 2560,
38
+ "architecture": "Turing"
39
+ }
40
+ ],
41
+ "cudaVersion": "12.6"
42
+ }
wandb/run-20250504_135537-fqirr53c/logs/debug-core.log ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"time":"2025-05-04T13:55:37.999673444Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpwvy2vmzr/port-31.txt","pid":31,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false}
2
+ {"time":"2025-05-04T13:55:38.016060325Z","level":"INFO","msg":"Will exit if parent process dies.","ppid":31}
3
+ {"time":"2025-05-04T13:55:38.01603677Z","level":"INFO","msg":"server is running","addr":{"IP":"127.0.0.1","Port":36069,"Zone":""}}
4
+ {"time":"2025-05-04T13:55:38.183306705Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"127.0.0.1:45302"}
5
+ {"time":"2025-05-04T13:55:38.202585815Z","level":"INFO","msg":"handleInformInit: received","streamId":"fqirr53c","id":"127.0.0.1:45302"}
6
+ {"time":"2025-05-04T13:55:38.418929081Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"fqirr53c","id":"127.0.0.1:45302"}
wandb/run-20250504_135537-fqirr53c/logs/debug-internal.log ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-05-04T13:55:38.202745569Z","level":"INFO","msg":"stream: starting","core version":"0.19.6","symlink path":"/kaggle/working/wandb/run-20250504_135537-fqirr53c/logs/debug-core.log"}
2
+ {"time":"2025-05-04T13:55:38.418825152Z","level":"INFO","msg":"created new stream","id":"fqirr53c"}
3
+ {"time":"2025-05-04T13:55:38.418912876Z","level":"INFO","msg":"stream: started","id":"fqirr53c"}
4
+ {"time":"2025-05-04T13:55:38.418912275Z","level":"INFO","msg":"handler: started","stream_id":"fqirr53c"}
5
+ {"time":"2025-05-04T13:55:38.418969742Z","level":"INFO","msg":"writer: Do: started","stream_id":"fqirr53c"}
6
+ {"time":"2025-05-04T13:55:38.420841754Z","level":"INFO","msg":"sender: started","stream_id":"fqirr53c"}
7
+ {"time":"2025-05-04T13:55:43.672416585Z","level":"INFO","msg":"Starting system monitor"}
8
+ {"time":"2025-05-04T13:55:44.301310251Z","level":"INFO","msg":"Pausing system monitor"}
9
+ {"time":"2025-05-04T13:55:44.30364919Z","level":"INFO","msg":"Resuming system monitor"}
10
+ {"time":"2025-05-04T13:55:48.938200586Z","level":"INFO","msg":"Pausing system monitor"}
11
+ {"time":"2025-05-04T13:55:48.938274693Z","level":"INFO","msg":"Resuming system monitor"}
12
+ {"time":"2025-05-04T13:55:48.949549147Z","level":"INFO","msg":"Pausing system monitor"}
13
+ {"time":"2025-05-04T13:55:48.955885077Z","level":"INFO","msg":"Resuming system monitor"}
14
+ {"time":"2025-05-04T13:55:49.478116866Z","level":"INFO","msg":"Pausing system monitor"}
15
+ {"time":"2025-05-04T13:55:49.482861133Z","level":"INFO","msg":"Resuming system monitor"}
16
+ {"time":"2025-05-04T13:55:50.200717672Z","level":"INFO","msg":"Pausing system monitor"}
17
+ {"time":"2025-05-04T13:56:37.950323685Z","level":"INFO","msg":"Resuming system monitor"}
18
+ {"time":"2025-05-04T13:56:37.99134844Z","level":"INFO","msg":"Pausing system monitor"}
19
+ {"time":"2025-05-04T13:56:45.174698103Z","level":"INFO","msg":"Resuming system monitor"}
20
+ {"time":"2025-05-04T13:56:45.215072733Z","level":"INFO","msg":"Pausing system monitor"}
21
+ {"time":"2025-05-04T13:56:48.022788722Z","level":"INFO","msg":"Resuming system monitor"}
22
+ {"time":"2025-05-04T13:56:48.027209579Z","level":"INFO","msg":"Pausing system monitor"}
23
+ {"time":"2025-05-04T13:56:52.774645063Z","level":"INFO","msg":"Resuming system monitor"}
24
+ {"time":"2025-05-04T13:56:52.778802909Z","level":"INFO","msg":"Pausing system monitor"}
25
+ {"time":"2025-05-04T13:56:56.16097781Z","level":"INFO","msg":"Resuming system monitor"}
26
+ {"time":"2025-05-04T13:56:56.201124178Z","level":"INFO","msg":"Pausing system monitor"}
27
+ {"time":"2025-05-04T13:57:14.976202089Z","level":"INFO","msg":"Resuming system monitor"}
28
+ {"time":"2025-05-04T13:57:15.017169337Z","level":"INFO","msg":"Pausing system monitor"}
29
+ {"time":"2025-05-04T13:57:19.728978475Z","level":"INFO","msg":"Resuming system monitor"}
30
+ {"time":"2025-05-04T13:57:19.731385896Z","level":"INFO","msg":"Pausing system monitor"}
31
+ {"time":"2025-05-04T13:57:29.106028217Z","level":"INFO","msg":"Resuming system monitor"}
32
+ {"time":"2025-05-04T13:57:29.199488171Z","level":"INFO","msg":"Pausing system monitor"}
33
+ {"time":"2025-05-04T13:57:45.426330298Z","level":"INFO","msg":"Resuming system monitor"}
34
+ {"time":"2025-05-04T13:57:45.600032002Z","level":"INFO","msg":"Pausing system monitor"}
35
+ {"time":"2025-05-04T13:57:49.34560494Z","level":"INFO","msg":"Resuming system monitor"}
36
+ {"time":"2025-05-04T13:57:49.369300379Z","level":"INFO","msg":"Pausing system monitor"}
37
+ {"time":"2025-05-04T13:57:52.874240065Z","level":"INFO","msg":"Resuming system monitor"}
38
+ {"time":"2025-05-04T13:57:53.031347665Z","level":"INFO","msg":"Pausing system monitor"}
39
+ {"time":"2025-05-04T13:57:54.288349856Z","level":"INFO","msg":"Resuming system monitor"}
40
+ {"time":"2025-05-04T13:57:54.329085476Z","level":"INFO","msg":"Pausing system monitor"}
41
+ {"time":"2025-05-04T13:57:57.558832771Z","level":"INFO","msg":"Resuming system monitor"}
42
+ {"time":"2025-05-04T13:57:57.562122272Z","level":"INFO","msg":"Pausing system monitor"}
43
+ {"time":"2025-05-04T13:58:08.678269262Z","level":"INFO","msg":"Resuming system monitor"}
44
+ {"time":"2025-05-04T13:58:08.719108347Z","level":"INFO","msg":"Pausing system monitor"}
45
+ {"time":"2025-05-04T13:58:08.953078089Z","level":"INFO","msg":"Resuming system monitor"}
46
+ {"time":"2025-05-04T13:58:09.202636924Z","level":"INFO","msg":"Pausing system monitor"}
47
+ {"time":"2025-05-04T13:58:09.234787853Z","level":"INFO","msg":"Resuming system monitor"}
48
+ {"time":"2025-05-04T13:58:09.248484505Z","level":"INFO","msg":"Pausing system monitor"}
49
+ {"time":"2025-05-04T13:58:09.455359852Z","level":"INFO","msg":"Resuming system monitor"}
50
+ {"time":"2025-05-04T13:58:10.519452133Z","level":"INFO","msg":"Pausing system monitor"}
51
+ {"time":"2025-05-04T13:58:10.525098579Z","level":"INFO","msg":"Resuming system monitor"}
52
+ {"time":"2025-05-04T13:58:10.556828598Z","level":"INFO","msg":"Pausing system monitor"}
53
+ {"time":"2025-05-04T13:58:12.566933352Z","level":"INFO","msg":"Resuming system monitor"}
54
+ {"time":"2025-05-04T13:58:12.598549609Z","level":"INFO","msg":"Pausing system monitor"}
55
+ {"time":"2025-05-04T13:58:12.696296996Z","level":"INFO","msg":"Resuming system monitor"}
56
+ {"time":"2025-05-04T13:58:12.982256266Z","level":"INFO","msg":"Pausing system monitor"}
57
+ {"time":"2025-05-04T13:58:13.918108951Z","level":"INFO","msg":"Resuming system monitor"}
58
+ {"time":"2025-05-04T13:58:14.432090582Z","level":"INFO","msg":"Pausing system monitor"}
59
+ {"time":"2025-05-04T13:58:23.250994046Z","level":"INFO","msg":"Resuming system monitor"}
60
+ {"time":"2025-05-04T13:58:23.254387514Z","level":"INFO","msg":"Pausing system monitor"}
61
+ {"time":"2025-05-04T13:58:26.529227986Z","level":"INFO","msg":"Resuming system monitor"}
62
+ {"time":"2025-05-04T13:58:26.570194007Z","level":"INFO","msg":"Pausing system monitor"}
63
+ {"time":"2025-05-04T13:58:28.01875759Z","level":"INFO","msg":"Resuming system monitor"}
64
+ {"time":"2025-05-04T13:58:28.022911581Z","level":"INFO","msg":"Pausing system monitor"}
65
+ {"time":"2025-05-04T13:58:29.519479289Z","level":"INFO","msg":"Resuming system monitor"}
66
+ {"time":"2025-05-04T13:58:29.560316017Z","level":"INFO","msg":"Pausing system monitor"}
67
+ {"time":"2025-05-04T13:58:37.473074826Z","level":"INFO","msg":"Resuming system monitor"}
68
+ {"time":"2025-05-04T13:58:37.514171205Z","level":"INFO","msg":"Pausing system monitor"}
69
+ {"time":"2025-05-04T13:58:39.094035633Z","level":"INFO","msg":"Resuming system monitor"}
70
+ {"time":"2025-05-04T13:58:39.096740272Z","level":"INFO","msg":"Pausing system monitor"}
71
+ {"time":"2025-05-04T13:58:40.78434538Z","level":"INFO","msg":"Resuming system monitor"}
72
+ {"time":"2025-05-04T13:58:40.786569163Z","level":"INFO","msg":"Pausing system monitor"}
73
+ {"time":"2025-05-04T13:58:44.495681201Z","level":"INFO","msg":"Resuming system monitor"}
74
+ {"time":"2025-05-04T13:58:44.536237954Z","level":"INFO","msg":"Pausing system monitor"}
75
+ {"time":"2025-05-04T13:59:50.772551272Z","level":"INFO","msg":"Resuming system monitor"}
76
+ {"time":"2025-05-04T13:59:50.775840849Z","level":"INFO","msg":"Pausing system monitor"}
77
+ {"time":"2025-05-04T14:00:22.55047425Z","level":"INFO","msg":"Resuming system monitor"}
78
+ {"time":"2025-05-04T14:00:22.755676331Z","level":"INFO","msg":"Pausing system monitor"}
79
+ {"time":"2025-05-04T14:00:23.635404257Z","level":"INFO","msg":"Resuming system monitor"}
80
+ {"time":"2025-05-04T14:00:23.638459244Z","level":"INFO","msg":"Pausing system monitor"}
81
+ {"time":"2025-05-04T14:00:27.786577245Z","level":"INFO","msg":"Resuming system monitor"}
82
+ {"time":"2025-05-04T14:00:27.82704493Z","level":"INFO","msg":"Pausing system monitor"}
83
+ {"time":"2025-05-04T14:00:28.083612564Z","level":"INFO","msg":"Resuming system monitor"}
84
+ {"time":"2025-05-04T14:00:28.307378133Z","level":"INFO","msg":"Pausing system monitor"}
85
+ {"time":"2025-05-04T14:00:29.410088042Z","level":"INFO","msg":"Resuming system monitor"}
86
+ {"time":"2025-05-04T14:00:29.451245423Z","level":"INFO","msg":"Pausing system monitor"}
87
+ {"time":"2025-05-04T14:00:30.370540904Z","level":"INFO","msg":"Resuming system monitor"}
88
+ {"time":"2025-05-04T14:00:31.453942197Z","level":"INFO","msg":"Pausing system monitor"}
89
+ {"time":"2025-05-04T14:00:31.459475789Z","level":"INFO","msg":"Resuming system monitor"}
90
+ {"time":"2025-05-04T14:00:31.491418417Z","level":"INFO","msg":"Pausing system monitor"}
91
+ {"time":"2025-05-04T14:00:32.913789166Z","level":"INFO","msg":"Resuming system monitor"}
92
+ {"time":"2025-05-04T14:00:33.19791726Z","level":"INFO","msg":"Pausing system monitor"}
93
+ {"time":"2025-05-04T14:00:33.202887077Z","level":"INFO","msg":"Resuming system monitor"}
wandb/run-20250504_135537-fqirr53c/logs/debug.log ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-05-04 13:55:37,339 INFO MainThread:31 [wandb_setup.py:_flush():68] Current SDK version is 0.19.6
2
+ 2025-05-04 13:55:37,339 INFO MainThread:31 [wandb_setup.py:_flush():68] Configure stats pid to 31
3
+ 2025-05-04 13:55:37,339 INFO MainThread:31 [wandb_setup.py:_flush():68] Loading settings from /root/.config/wandb/settings
4
+ 2025-05-04 13:55:37,339 INFO MainThread:31 [wandb_setup.py:_flush():68] Loading settings from /kaggle/working/wandb/settings
5
+ 2025-05-04 13:55:37,339 INFO MainThread:31 [wandb_setup.py:_flush():68] Loading settings from environment variables
6
+ 2025-05-04 13:55:37,339 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /kaggle/working/wandb/run-20250504_135537-fqirr53c/logs/debug.log
7
+ 2025-05-04 13:55:37,339 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /kaggle/working/wandb/run-20250504_135537-fqirr53c/logs/debug-internal.log
8
+ 2025-05-04 13:55:37,339 INFO MainThread:31 [wandb_init.py:monkeypatch_ipython():589] configuring jupyter hooks <wandb.sdk.wandb_init._WandbInit object at 0x7f46cb26cd10>
9
+ 2025-05-04 13:55:37,340 INFO MainThread:31 [wandb_init.py:init():756] calling init triggers
10
+ 2025-05-04 13:55:37,340 INFO MainThread:31 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
11
+ config: {'_wandb': {}}
12
+ 2025-05-04 13:55:37,340 INFO MainThread:31 [wandb_init.py:init():789] starting backend
13
+ 2025-05-04 13:55:38,179 INFO MainThread:31 [wandb_init.py:init():793] sending inform_init request
14
+ 2025-05-04 13:55:38,193 INFO MainThread:31 [backend.py:_multiprocessing_setup():97] multiprocessing start_methods=fork,spawn,forkserver, using: spawn
15
+ 2025-05-04 13:55:38,193 INFO MainThread:31 [wandb_init.py:init():808] backend started and connected
16
+ 2025-05-04 13:55:38,201 INFO MainThread:31 [wandb_run.py:_label_probe_notebook():1196] probe notebook
17
+ 2025-05-04 13:55:43,522 INFO MainThread:31 [wandb_init.py:init():901] updated telemetry
18
+ 2025-05-04 13:55:43,523 INFO MainThread:31 [wandb_init.py:init():936] communicating run to backend with 90.0 second timeout
19
+ 2025-05-04 13:55:43,665 INFO MainThread:31 [wandb_init.py:init():994] starting run threads in backend
20
+ 2025-05-04 13:55:44,262 INFO MainThread:31 [wandb_run.py:_console_start():2385] atexit reg
21
+ 2025-05-04 13:55:44,263 INFO MainThread:31 [wandb_run.py:_redirect():2235] redirect: wrap_raw
22
+ 2025-05-04 13:55:44,263 INFO MainThread:31 [wandb_run.py:_redirect():2300] Wrapping output streams.
23
+ 2025-05-04 13:55:44,263 INFO MainThread:31 [wandb_run.py:_redirect():2325] Redirects installed.
24
+ 2025-05-04 13:55:44,273 INFO MainThread:31 [wandb_init.py:init():1036] run started, returning control to user process
25
+ 2025-05-04 13:55:44,273 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
26
+ 2025-05-04 13:55:44,274 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
27
+ 2025-05-04 13:55:44,279 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
28
+ 2025-05-04 13:55:48,899 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
29
+ 2025-05-04 13:55:48,899 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
30
+ 2025-05-04 13:55:48,905 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
31
+ 2025-05-04 13:55:48,948 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
32
+ 2025-05-04 13:55:48,949 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
33
+ 2025-05-04 13:55:48,955 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
34
+ 2025-05-04 13:55:49,477 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
35
+ 2025-05-04 13:55:49,477 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
36
+ 2025-05-04 13:55:49,482 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
37
+ 2025-05-04 13:55:49,977 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'UBC-NLP/MARBERT', '_attn_implementation_autoset': True, 'transformers_version': '4.51.1', 'directionality': 'bidi', 'gradient_checkpointing': False, 'model_type': 'bert', 'pooler_fc_size': 768, 'pooler_num_attention_heads': 12, 'pooler_num_fc_layers': 3, 'pooler_size_per_head': 128, 'pooler_type': 'first_token_transform', 'vocab_size': 100000, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.2, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/May04_13-55-48_40834be6d3ef', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'macro_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'tp_size': 0, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False}
38
+ 2025-05-04 13:55:49,986 INFO MainThread:31 [wandb_config.py:__setitem__():154] config set model/num_parameters = 162843651 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f46cb2f3cd0>>
39
+ 2025-05-04 13:55:49,986 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 162843651 None
40
+ 2025-05-04 13:55:50,199 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
41
+ 2025-05-04 13:55:50,200 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
42
+ 2025-05-04 13:56:37,949 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
43
+ 2025-05-04 13:56:37,961 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
44
+ 2025-05-04 13:56:37,962 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
45
+ 2025-05-04 13:56:45,173 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
46
+ 2025-05-04 13:56:45,177 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
47
+ 2025-05-04 13:56:45,178 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
48
+ 2025-05-04 13:56:48,022 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
49
+ 2025-05-04 13:56:48,026 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
50
+ 2025-05-04 13:56:48,026 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
51
+ 2025-05-04 13:56:52,773 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
52
+ 2025-05-04 13:56:52,778 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
53
+ 2025-05-04 13:56:52,778 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
54
+ 2025-05-04 13:56:56,160 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
55
+ 2025-05-04 13:56:56,164 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
56
+ 2025-05-04 13:56:56,164 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
57
+ 2025-05-04 13:57:14,975 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
58
+ 2025-05-04 13:57:14,977 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
59
+ 2025-05-04 13:57:14,977 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
60
+ 2025-05-04 13:57:19,728 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
61
+ 2025-05-04 13:57:19,730 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
62
+ 2025-05-04 13:57:19,730 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
63
+ 2025-05-04 13:57:29,105 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
64
+ 2025-05-04 13:57:29,198 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
65
+ 2025-05-04 13:57:29,198 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
66
+ 2025-05-04 13:57:45,425 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
67
+ 2025-05-04 13:57:45,599 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
68
+ 2025-05-04 13:57:45,599 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
69
+ 2025-05-04 13:57:49,345 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
70
+ 2025-05-04 13:57:49,368 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
71
+ 2025-05-04 13:57:49,368 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
72
+ 2025-05-04 13:57:52,873 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
73
+ 2025-05-04 13:57:53,030 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
74
+ 2025-05-04 13:57:53,030 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
75
+ 2025-05-04 13:57:54,287 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
76
+ 2025-05-04 13:57:54,306 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
77
+ 2025-05-04 13:57:54,306 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
78
+ 2025-05-04 13:57:57,558 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
79
+ 2025-05-04 13:57:57,561 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
80
+ 2025-05-04 13:57:57,561 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
81
+ 2025-05-04 13:58:08,677 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
82
+ 2025-05-04 13:58:08,680 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
83
+ 2025-05-04 13:58:08,680 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
84
+ 2025-05-04 13:58:08,952 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
85
+ 2025-05-04 13:58:09,202 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
86
+ 2025-05-04 13:58:09,202 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
87
+ 2025-05-04 13:58:09,233 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
88
+ 2025-05-04 13:58:09,245 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /kaggle/working/wandb/run-20250504_135809-y962z7x6/logs/debug.log
89
+ 2025-05-04 13:58:09,246 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /kaggle/working/wandb/run-20250504_135809-y962z7x6/logs/debug-internal.log
90
+ 2025-05-04 13:58:09,246 INFO MainThread:31 [wandb_init.py:init():756] calling init triggers
91
+ 2025-05-04 13:58:09,246 INFO MainThread:31 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
92
+ config: {'_wandb': {}}
93
+ 2025-05-04 13:58:09,246 INFO MainThread:31 [wandb_init.py:init():781] wandb.init() called when a run is still active
94
+ 2025-05-04 13:58:09,247 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
95
+ 2025-05-04 13:58:09,247 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
96
+ 2025-05-04 13:58:09,454 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
97
+ 2025-05-04 13:58:10,518 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
98
+ 2025-05-04 13:58:10,519 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
99
+ 2025-05-04 13:58:10,524 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
100
+ 2025-05-04 13:58:10,556 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
101
+ 2025-05-04 13:58:10,556 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
102
+ 2025-05-04 13:58:12,566 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
103
+ 2025-05-04 13:58:12,597 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
104
+ 2025-05-04 13:58:12,597 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
105
+ 2025-05-04 13:58:12,695 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
106
+ 2025-05-04 13:58:12,981 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
107
+ 2025-05-04 13:58:12,981 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
108
+ 2025-05-04 13:58:13,917 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
109
+ 2025-05-04 13:58:14,387 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'UBC-NLP/MARBERT', '_attn_implementation_autoset': True, 'transformers_version': '4.51.1', 'directionality': 'bidi', 'gradient_checkpointing': False, 'model_type': 'bert', 'pooler_fc_size': 768, 'pooler_num_attention_heads': 12, 'pooler_num_fc_layers': 3, 'pooler_size_per_head': 128, 'pooler_type': 'first_token_transform', 'vocab_size': 100000, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.2, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/May04_13-58-12_40834be6d3ef', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'macro_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'tp_size': 0, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False}
110
+ 2025-05-04 13:58:14,392 INFO MainThread:31 [wandb_config.py:__setitem__():154] config set model/num_parameters = 162843651 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f46cb2f3cd0>>
111
+ 2025-05-04 13:58:14,392 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 162843651 None
112
+ 2025-05-04 13:58:14,417 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
113
+ 2025-05-04 13:58:14,417 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
114
+ 2025-05-04 13:58:23,249 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
115
+ 2025-05-04 13:58:23,253 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
116
+ 2025-05-04 13:58:23,253 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
117
+ 2025-05-04 13:58:26,528 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
118
+ 2025-05-04 13:58:26,532 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
119
+ 2025-05-04 13:58:26,532 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
120
+ 2025-05-04 13:58:28,017 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
121
+ 2025-05-04 13:58:28,021 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
122
+ 2025-05-04 13:58:28,022 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
123
+ 2025-05-04 13:58:29,518 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
124
+ 2025-05-04 13:58:29,522 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
125
+ 2025-05-04 13:58:29,523 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
126
+ 2025-05-04 13:58:37,472 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
127
+ 2025-05-04 13:58:37,475 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
128
+ 2025-05-04 13:58:37,475 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
129
+ 2025-05-04 13:58:39,093 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
130
+ 2025-05-04 13:58:39,095 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
131
+ 2025-05-04 13:58:39,096 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
132
+ 2025-05-04 13:58:40,783 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
133
+ 2025-05-04 13:58:40,786 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
134
+ 2025-05-04 13:58:40,786 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
135
+ 2025-05-04 13:58:44,494 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
136
+ 2025-05-04 13:58:44,498 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
137
+ 2025-05-04 13:58:44,498 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
138
+ 2025-05-04 13:59:50,771 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
139
+ 2025-05-04 13:59:50,775 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
140
+ 2025-05-04 13:59:50,775 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
141
+ 2025-05-04 14:00:22,549 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
142
+ 2025-05-04 14:00:22,755 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
143
+ 2025-05-04 14:00:22,755 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
144
+ 2025-05-04 14:00:23,634 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
145
+ 2025-05-04 14:00:23,637 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
146
+ 2025-05-04 14:00:23,637 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
147
+ 2025-05-04 14:00:27,785 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
148
+ 2025-05-04 14:00:27,788 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
149
+ 2025-05-04 14:00:27,788 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
150
+ 2025-05-04 14:00:28,083 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
151
+ 2025-05-04 14:00:28,306 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
152
+ 2025-05-04 14:00:28,306 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
153
+ 2025-05-04 14:00:29,409 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
154
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /kaggle/working/wandb/run-20250504_140029-gm7oud70/logs/debug.log
155
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /kaggle/working/wandb/run-20250504_140029-gm7oud70/logs/debug-internal.log
156
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:init():756] calling init triggers
157
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
158
+ config: {'_wandb': {}}
159
+ 2025-05-04 14:00:29,422 INFO MainThread:31 [wandb_init.py:init():781] wandb.init() called when a run is still active
160
+ 2025-05-04 14:00:29,422 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
161
+ 2025-05-04 14:00:29,422 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
162
+ 2025-05-04 14:00:30,369 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
163
+ 2025-05-04 14:00:31,453 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
164
+ 2025-05-04 14:00:31,453 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
165
+ 2025-05-04 14:00:31,458 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
166
+ 2025-05-04 14:00:31,490 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
167
+ 2025-05-04 14:00:31,490 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
168
+ 2025-05-04 14:00:32,913 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
169
+ 2025-05-04 14:00:33,197 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
170
+ 2025-05-04 14:00:33,197 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
171
+ 2025-05-04 14:00:33,202 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
172
+ 2025-05-04 14:00:33,664 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'UBC-NLP/MARBERT', '_attn_implementation_autoset': True, 'transformers_version': '4.51.1', 'directionality': 'bidi', 'gradient_checkpointing': False, 'model_type': 'bert', 'pooler_fc_size': 768, 'pooler_num_attention_heads': 12, 'pooler_num_fc_layers': 3, 'pooler_size_per_head': 128, 'pooler_type': 'first_token_transform', 'vocab_size': 100000, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.2, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/May04_14-00-31_40834be6d3ef', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'macro_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'tp_size': 0, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False}
173
+ 2025-05-04 14:00:33,670 INFO MainThread:31 [wandb_config.py:__setitem__():154] config set model/num_parameters = 162843651 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f46cb2f3cd0>>
174
+ 2025-05-04 14:00:33,670 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 162843651 None
wandb/run-20250504_135537-fqirr53c/run-fqirr53c.wandb ADDED
File without changes
wandb/run-20250504_135809-y962z7x6/logs/debug.log ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-05-04 13:58:09,245 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /kaggle/working/wandb/run-20250504_135809-y962z7x6/logs/debug.log
2
+ 2025-05-04 13:58:09,246 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /kaggle/working/wandb/run-20250504_135809-y962z7x6/logs/debug-internal.log
3
+ 2025-05-04 13:58:09,246 INFO MainThread:31 [wandb_init.py:init():756] calling init triggers
4
+ 2025-05-04 13:58:09,246 INFO MainThread:31 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
5
+ config: {'_wandb': {}}
6
+ 2025-05-04 13:58:09,246 INFO MainThread:31 [wandb_init.py:init():781] wandb.init() called when a run is still active
7
+ 2025-05-04 13:58:09,247 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
8
+ 2025-05-04 13:58:09,247 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
9
+ 2025-05-04 13:58:09,454 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
10
+ 2025-05-04 13:58:10,518 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
11
+ 2025-05-04 13:58:10,519 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
12
+ 2025-05-04 13:58:10,524 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
13
+ 2025-05-04 13:58:10,556 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
14
+ 2025-05-04 13:58:10,556 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
15
+ 2025-05-04 13:58:12,566 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
16
+ 2025-05-04 13:58:12,597 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
17
+ 2025-05-04 13:58:12,597 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
18
+ 2025-05-04 13:58:12,695 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
19
+ 2025-05-04 13:58:12,981 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
20
+ 2025-05-04 13:58:12,981 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
21
+ 2025-05-04 13:58:13,917 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
22
+ 2025-05-04 13:58:14,387 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'UBC-NLP/MARBERT', '_attn_implementation_autoset': True, 'transformers_version': '4.51.1', 'directionality': 'bidi', 'gradient_checkpointing': False, 'model_type': 'bert', 'pooler_fc_size': 768, 'pooler_num_attention_heads': 12, 'pooler_num_fc_layers': 3, 'pooler_size_per_head': 128, 'pooler_type': 'first_token_transform', 'vocab_size': 100000, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.2, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/May04_13-58-12_40834be6d3ef', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'macro_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'tp_size': 0, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False}
23
+ 2025-05-04 13:58:14,392 INFO MainThread:31 [wandb_config.py:__setitem__():154] config set model/num_parameters = 162843651 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f46cb2f3cd0>>
24
+ 2025-05-04 13:58:14,392 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 162843651 None
25
+ 2025-05-04 13:58:14,417 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
26
+ 2025-05-04 13:58:14,417 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
27
+ 2025-05-04 13:58:23,249 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
28
+ 2025-05-04 13:58:23,253 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
29
+ 2025-05-04 13:58:23,253 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
30
+ 2025-05-04 13:58:26,528 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
31
+ 2025-05-04 13:58:26,532 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
32
+ 2025-05-04 13:58:26,532 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
33
+ 2025-05-04 13:58:28,017 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
34
+ 2025-05-04 13:58:28,021 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
35
+ 2025-05-04 13:58:28,022 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
36
+ 2025-05-04 13:58:29,518 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
37
+ 2025-05-04 13:58:29,522 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
38
+ 2025-05-04 13:58:29,523 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
39
+ 2025-05-04 13:58:37,472 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
40
+ 2025-05-04 13:58:37,475 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
41
+ 2025-05-04 13:58:37,475 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
42
+ 2025-05-04 13:58:39,093 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
43
+ 2025-05-04 13:58:39,095 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
44
+ 2025-05-04 13:58:39,096 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
45
+ 2025-05-04 13:58:40,783 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
46
+ 2025-05-04 13:58:40,786 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
47
+ 2025-05-04 13:58:40,786 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
48
+ 2025-05-04 13:58:44,494 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
49
+ 2025-05-04 13:58:44,498 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
50
+ 2025-05-04 13:58:44,498 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
51
+ 2025-05-04 13:59:50,771 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
52
+ 2025-05-04 13:59:50,775 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
53
+ 2025-05-04 13:59:50,775 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
54
+ 2025-05-04 14:00:22,549 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
55
+ 2025-05-04 14:00:22,755 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
56
+ 2025-05-04 14:00:22,755 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
57
+ 2025-05-04 14:00:23,634 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
58
+ 2025-05-04 14:00:23,637 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
59
+ 2025-05-04 14:00:23,637 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
60
+ 2025-05-04 14:00:27,785 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
61
+ 2025-05-04 14:00:27,788 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
62
+ 2025-05-04 14:00:27,788 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
63
+ 2025-05-04 14:00:28,083 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
64
+ 2025-05-04 14:00:28,306 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
65
+ 2025-05-04 14:00:28,306 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
66
+ 2025-05-04 14:00:29,409 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
67
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /kaggle/working/wandb/run-20250504_140029-gm7oud70/logs/debug.log
68
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /kaggle/working/wandb/run-20250504_140029-gm7oud70/logs/debug-internal.log
69
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:init():756] calling init triggers
70
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
71
+ config: {'_wandb': {}}
72
+ 2025-05-04 14:00:29,422 INFO MainThread:31 [wandb_init.py:init():781] wandb.init() called when a run is still active
73
+ 2025-05-04 14:00:29,422 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
74
+ 2025-05-04 14:00:29,422 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
75
+ 2025-05-04 14:00:30,369 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
76
+ 2025-05-04 14:00:31,453 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
77
+ 2025-05-04 14:00:31,453 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
78
+ 2025-05-04 14:00:31,458 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
79
+ 2025-05-04 14:00:31,490 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
80
+ 2025-05-04 14:00:31,490 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
81
+ 2025-05-04 14:00:32,913 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
82
+ 2025-05-04 14:00:33,197 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
83
+ 2025-05-04 14:00:33,197 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
84
+ 2025-05-04 14:00:33,202 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
85
+ 2025-05-04 14:00:33,664 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'UBC-NLP/MARBERT', '_attn_implementation_autoset': True, 'transformers_version': '4.51.1', 'directionality': 'bidi', 'gradient_checkpointing': False, 'model_type': 'bert', 'pooler_fc_size': 768, 'pooler_num_attention_heads': 12, 'pooler_num_fc_layers': 3, 'pooler_size_per_head': 128, 'pooler_type': 'first_token_transform', 'vocab_size': 100000, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.2, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/May04_14-00-31_40834be6d3ef', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'macro_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'tp_size': 0, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False}
86
+ 2025-05-04 14:00:33,670 INFO MainThread:31 [wandb_config.py:__setitem__():154] config set model/num_parameters = 162843651 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f46cb2f3cd0>>
87
+ 2025-05-04 14:00:33,670 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 162843651 None
wandb/run-20250504_140029-gm7oud70/logs/debug.log ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():637] Logging user logs to /kaggle/working/wandb/run-20250504_140029-gm7oud70/logs/debug.log
2
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:setup_run_log_directory():638] Logging internal logs to /kaggle/working/wandb/run-20250504_140029-gm7oud70/logs/debug-internal.log
3
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:init():756] calling init triggers
4
+ 2025-05-04 14:00:29,421 INFO MainThread:31 [wandb_init.py:init():761] wandb.init called with sweep_config: {}
5
+ config: {'_wandb': {}}
6
+ 2025-05-04 14:00:29,422 INFO MainThread:31 [wandb_init.py:init():781] wandb.init() called when a run is still active
7
+ 2025-05-04 14:00:29,422 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
8
+ 2025-05-04 14:00:29,422 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
9
+ 2025-05-04 14:00:30,369 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
10
+ 2025-05-04 14:00:31,453 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
11
+ 2025-05-04 14:00:31,453 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
12
+ 2025-05-04 14:00:31,458 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
13
+ 2025-05-04 14:00:31,490 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
14
+ 2025-05-04 14:00:31,490 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
15
+ 2025-05-04 14:00:32,913 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
16
+ 2025-05-04 14:00:33,197 INFO MainThread:31 [jupyter.py:save_ipynb():386] not saving jupyter notebook
17
+ 2025-05-04 14:00:33,197 INFO MainThread:31 [wandb_init.py:_pause_backend():554] pausing backend
18
+ 2025-05-04 14:00:33,202 INFO MainThread:31 [wandb_init.py:_resume_backend():559] resuming backend
19
+ 2025-05-04 14:00:33,664 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb None None {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': 'float32', 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay_length_penalty': None, 'suppress_tokens': None, 'begin_suppress_tokens': None, 'architectures': ['BertForMaskedLM'], 'finetuning_task': None, 'id2label': {0: 'LABEL_0', 1: 'LABEL_1', 2: 'LABEL_2'}, 'label2id': {'LABEL_0': 0, 'LABEL_1': 1, 'LABEL_2': 2}, 'tokenizer_class': None, 'prefix': None, 'bos_token_id': None, 'pad_token_id': 0, 'eos_token_id': None, 'sep_token_id': None, 'decoder_start_token_id': None, 'task_specific_params': None, 'problem_type': None, '_name_or_path': 'UBC-NLP/MARBERT', '_attn_implementation_autoset': True, 'transformers_version': '4.51.1', 'directionality': 'bidi', 'gradient_checkpointing': False, 'model_type': 'bert', 'pooler_fc_size': 768, 'pooler_num_attention_heads': 12, 'pooler_num_fc_layers': 3, 'pooler_size_per_head': 128, 'pooler_type': 'first_token_transform', 'vocab_size': 100000, 'hidden_size': 768, 'num_hidden_layers': 12, 'num_attention_heads': 12, 'hidden_act': 'gelu', 'intermediate_size': 3072, 'hidden_dropout_prob': 0.1, 'attention_probs_dropout_prob': 0.1, 'max_position_embeddings': 512, 'type_vocab_size': 2, 'initializer_range': 0.02, 'layer_norm_eps': 1e-12, 'position_embedding_type': 'absolute', 'use_cache': True, 'classifier_dropout': None, 'output_dir': '/kaggle/working/', 'overwrite_output_dir': False, 'do_train': False, 'do_eval': True, 'do_predict': False, 'eval_strategy': 'steps', 'prediction_loss_only': False, 'per_device_train_batch_size': 8, 'per_device_eval_batch_size': 8, 'per_gpu_train_batch_size': None, 'per_gpu_eval_batch_size': None, 'gradient_accumulation_steps': 2, 'eval_accumulation_steps': None, 'eval_delay': 0, 'torch_empty_cache_steps': None, 'learning_rate': 5e-05, 'weight_decay': 0.0, 'adam_beta1': 0.9, 'adam_beta2': 0.999, 'adam_epsilon': 1e-08, 'max_grad_norm': 1.0, 'num_train_epochs': 5, 'max_steps': -1, 'lr_scheduler_type': 'linear', 'lr_scheduler_kwargs': {}, 'warmup_ratio': 0.2, 'warmup_steps': 0, 'log_level': 'passive', 'log_level_replica': 'warning', 'log_on_each_node': True, 'logging_dir': '/kaggle/working/runs/May04_14-00-31_40834be6d3ef', 'logging_strategy': 'steps', 'logging_first_step': False, 'logging_steps': 500, 'logging_nan_inf_filter': True, 'save_strategy': 'steps', 'save_steps': 5, 'save_total_limit': 1, 'save_safetensors': True, 'save_on_each_node': False, 'save_only_model': False, 'restore_callback_states_from_checkpoint': False, 'no_cuda': False, 'use_cpu': False, 'use_mps_device': False, 'seed': 42, 'data_seed': None, 'jit_mode_eval': False, 'use_ipex': False, 'bf16': False, 'fp16': True, 'fp16_opt_level': 'O1', 'half_precision_backend': 'auto', 'bf16_full_eval': False, 'fp16_full_eval': False, 'tf32': None, 'local_rank': 0, 'ddp_backend': None, 'tpu_num_cores': None, 'tpu_metrics_debug': False, 'debug': [], 'dataloader_drop_last': False, 'eval_steps': 5, 'dataloader_num_workers': 0, 'dataloader_prefetch_factor': None, 'past_index': -1, 'run_name': '/kaggle/working/', 'disable_tqdm': False, 'remove_unused_columns': True, 'label_names': None, 'load_best_model_at_end': True, 'metric_for_best_model': 'macro_f1', 'greater_is_better': True, 'ignore_data_skip': False, 'fsdp': [], 'fsdp_min_num_params': 0, 'fsdp_config': {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}, 'tp_size': 0, 'fsdp_transformer_layer_cls_to_wrap': None, 'accelerator_config': {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}, 'deepspeed': None, 'label_smoothing_factor': 0.0, 'optim': 'adamw_torch', 'optim_args': None, 'adafactor': False, 'group_by_length': False, 'length_column_name': 'length', 'report_to': ['wandb'], 'ddp_find_unused_parameters': None, 'ddp_bucket_cap_mb': None, 'ddp_broadcast_buffers': None, 'dataloader_pin_memory': True, 'dataloader_persistent_workers': False, 'skip_memory_metrics': True, 'use_legacy_prediction_loop': False, 'push_to_hub': True, 'resume_from_checkpoint': None, 'hub_model_id': None, 'hub_strategy': 'every_save', 'hub_token': '<HUB_TOKEN>', 'hub_private_repo': None, 'hub_always_push': False, 'gradient_checkpointing_kwargs': None, 'include_inputs_for_metrics': False, 'include_for_metrics': [], 'eval_do_concat_batches': True, 'fp16_backend': 'auto', 'push_to_hub_model_id': None, 'push_to_hub_organization': None, 'push_to_hub_token': '<PUSH_TO_HUB_TOKEN>', 'mp_parameters': '', 'auto_find_batch_size': False, 'full_determinism': False, 'torchdynamo': None, 'ray_scope': 'last', 'ddp_timeout': 1800, 'torch_compile': False, 'torch_compile_backend': None, 'torch_compile_mode': None, 'include_tokens_per_second': False, 'include_num_input_tokens_seen': False, 'neftune_noise_alpha': None, 'optim_target_modules': None, 'batch_eval_metrics': False, 'eval_on_start': False, 'use_liger_kernel': False, 'eval_use_gather_object': False, 'average_tokens_across_devices': False}
20
+ 2025-05-04 14:00:33,670 INFO MainThread:31 [wandb_config.py:__setitem__():154] config set model/num_parameters = 162843651 - <bound method Run._config_callback of <wandb.sdk.wandb_run.Run object at 0x7f46cb2f3cd0>>
21
+ 2025-05-04 14:00:33,670 INFO MainThread:31 [wandb_run.py:_config_callback():1253] config_cb model/num_parameters 162843651 None