YAML Metadata
Warning:
empty or missing yaml metadata in repo card
(https://huggingface.co/docs/hub/model-cards#model-card-metadata)
Overview
T5-Base v1.1 model trained to generate hypotheses given a premise and a label. Below the settings used to train it
Experiment configurations
βββ datasets
β βββ mnli_train:
β dataset_name: multi_nli
β dataset_config_name: null
β cache_dir: null
β input_fields:
β - premise
β - hypothesis
β target_field: label
β train_subset_names: null
β val_subset_names: validation_matched
β test_subset_names: none
β train_val_split: null
β limit_train_samples: null
β limit_val_samples: null
β limit_test_samples: null
β sampling_kwargs:
β sampling_strategy: random
β seed: 42
β replace: false
β align_labels_with_mapping: null
β avoid_consistency_check: false
β predict_label_mapping: null
β mnli:
β dataset_name: multi_nli
β dataset_config_name: null
β cache_dir: null
β input_fields:
β - premise
β - hypothesis
β target_field: label
β train_subset_names: none
β val_subset_names: none
β test_subset_names: validation_mismatched
β train_val_split: null
β limit_train_samples: null
β limit_val_samples: null
β limit_test_samples: null
β sampling_kwargs:
β sampling_strategy: random
β seed: 42
β replace: false
β align_labels_with_mapping: null
β avoid_consistency_check: false
β predict_label_mapping: null
β
βββ data
β βββ _target_: src.task.nli.data.NLIGenerationData.from_config
β main_dataset_name: null
β use_additional_as_test: null
β dataloader:
β batch_size: 64
β eval_batch_size: 100
β num_workers: 16
β pin_memory: true
β drop_last: false
β persistent_workers: false
β shuffle: true
β seed_dataloader: 42
β replacement: false
β processing:
β preprocessing_num_workers: 16
β preprocessing_batch_size: 1000
β load_from_cache_file: true
β padding: longest
β truncation: longest_first
β max_source_length: 128
β max_target_length: 128
β template: 'premise: $premise $label hypothesis: '
β tokenizer:
β _target_: transformers.AutoTokenizer.from_pretrained
β pretrained_model_name_or_path: google/t5-v1_1-base
β use_fast: true
β
βββ task
β βββ optimizer:
β name: Adafactor
β lr: 0.001
β weight_decay: 0.0
β no_decay:
β - bias
β - LayerNorm.weight
β decay_rate: -0.8
β clip_threshold: 1.0
β relative_step: false
β scale_parameter: false
β warmup_init: false
β scheduler:
β name: constant_schedule
β model:
β model_name_or_path: google/t5-v1_1-base
β checkpoint_path: null
β freeze: false
β seed_init_weight: 42
β _target_: src.task.nli.NLIGenerationTask.from_config
β generation:
β max_length: 128
β min_length: 3
β do_sample: true
β early_stopping: false
β num_beams: 1
β temperature: 1.0
β top_k: 50
β top_p: 0.95
β repetition_penalty: null
β length_penalty: null
β no_repeat_ngram_size: null
β encoder_no_repeat_ngram_size: null
β num_return_sequences: 1
β max_time: null
β max_new_tokens: null
β decoder_start_token_id: null
β use_cache: null
β num_beam_groups: null
β diversity_penalty: null
β
βββ trainer
β βββ _target_: pytorch_lightning.Trainer
β callbacks:
β lr_monitor:
β _target_: pytorch_lightning.callbacks.LearningRateMonitor
β logging_interval: step
β log_momentum: false
β model_checkpoint:
β _target_: pytorch_lightning.callbacks.ModelCheckpoint
β dirpath: ./checkpoints/
β filename: nli_generator_mnli-epoch={epoch:02d}-val_loss={val/aggregated_loss:.2f}
β monitor: val/aggregated_loss
β mode: min
β verbose: false
β save_last: true
β save_top_k: 1
β auto_insert_metric_name: false
β save_on_train_epoch_end: false
β rich_model_summary:
β _target_: pytorch_lightning.callbacks.RichModelSummary
β max_depth: 1
β log_grad_norm:
β _target_: src.core.callbacks.LogGradNorm
β norm_type: 2
β group_separator: /
β only_total: true
β on_step: true
β on_epoch: false
β prog_bar: true
β log_generated_text:
β _target_: src.core.callbacks.GenerateAndLogText
β dirpath: ./generated_text
β type: generated_text
β pop_keys_after_logging: true
β on_train: false
β on_validation: false
β on_test: true
β log_to_wandb: true
β wandb_log_dataset_sizes:
β _target_: src.core.callbacks.WandbLogDatasetSizes
β logger:
β wandb:
β _target_: pytorch_lightning.loggers.WandbLogger
β project: nli_debiasing
β entity: team_brushino
β name: nli_generator_mnli
β save_dir: ./
β offline: false
β log_model: false
β group: mnli
β job_type: generator
β tags:
β - nli_generator_mnli
β - seed=42
β - seed_dataloader=42
β notes: nli_generator_mnli_time=02-24-53
β enable_checkpointing: true
β enable_progress_bar: true
β enable_model_summary: true
β gradient_clip_val: 0.0
β gradient_clip_algorithm: null
β accelerator: gpu
β devices: auto
β gpus: null
β auto_select_gpus: true
β accumulate_grad_batches: 1
β max_epochs: 3
β min_epochs: 1
β max_steps: -1
β min_steps: null
β max_time: null
β num_sanity_val_steps: 2
β overfit_batches: 0.0
β fast_dev_run: false
β limit_train_batches: 1.0
β limit_val_batches: 1.0
β limit_test_batches: 1.0
β profiler: null
β detect_anomaly: false
β deterministic: false
β check_val_every_n_epoch: 1
β val_check_interval: 0.1
β log_every_n_steps: 10
β move_metrics_to_cpu: false
β
βββ training
βββ run_val_before_fit: false
run_val_after_fit: false
run_test_before_fit: false
run_test_after_fit: true
lr: 0.001
seed: 42
show_batch: false
batch_size: 64
eval_batch_size: 100
num_workers: 16
pin_memory: true
drop_last: false
persistent_workers: false
shuffle: true
seed_dataloader: 42
ignore_warnings: true
experiment_name: nli_generator_mnli
- Downloads last month
- 2
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
π
Ask for provider support