hazyresearch/cartridge-wauoq23f

Training Configuration

_config_type:
  _is_type: true
  _module: capsules.train
  _qualname: TrainConfig
run_dir: /data/sabri/capsules/2025-05-10-14-56-42-train_longhealth_simple/68e4c064-dc5a-46c8-a726-b3c7977e9e1a
output_dir: /data/sabri/capsules
run_id: 68e4c064-dc5a-46c8-a726-b3c7977e9e1a
launch_id: 2025-05-10-14-56-42-train_longhealth_simple
script_id: train_longhealth_simple
name: train_longhealth_simple_p10_lr0.02_toks2048
model:
  _config_type:
    _is_type: true
    _module: capsules.config
    _qualname: HFModelConfig
  checkpoint_path: null
  pretrained_model_name_or_path: meta-llama/Llama-3.2-3B-Instruct
  load_kwargs: {}
  peft:
    _config_type:
      _is_type: true
      _module: capsules.config
      _qualname: PeftConfig
    enabled: false
    method: lora
    r: 8
    alpha: 16
    dropout: 0.0
    bias: none
    task_type: CAUSAL_LM
    num_virtual_tokens: 20
    encoder_hidden_size: null
    prefix_projection: false
    prompt_tuning_init: null
    prompt_tuning_init_text: null
    encoder_reparameterization_type: MLP
    encoder_dropout: 0.0
    adapter_reduction_factor: 16
    adapter_non_linearity: relu
    target_modules: null
    extra_params: {}
  tuning_method: custom_prefix
  model_cls:
    _is_type: true
    _module: capsules.models.llama
    _qualname: LlamaForCausalLM
  attn_implementation: einsum
wandb:
  _config_type:
    _is_type: true
    _module: capsules.utils.wandb
    _qualname: WandBConfig
  project: capsules
  entity: hazy-research
  name: train_longhealth_simple_p10_lr0.02_toks2048
  tags:
  - train
  - longhealth
  - patientsp10
  notes: null
  group: null
dataset:
  _config_type:
    _is_type: true
    _module: capsules.datasets
    _qualname: CapsuleDatasetLatest.Config
  target:
    _is_type: true
    _module: capsules.datasets
    _qualname: CapsuleDatasetLatest
  kwargs: {}
  data_sources:
  - !!python/tuple
    - hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v0
    - null
  - !!python/tuple
    - hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v1
    - null
  is_wandb: true
  label_type: logits
  top_k_logits: 20
  dataset_weights: null
  user_prompt_prefix: null
  convo_transforms: null
  max_sequence_length: 1024
context:
  _config_type:
    _is_type: true
    _module: capsules.tasks.longhealth.context
    _qualname: LongHealthStructuredContextConfig
  patient_ids:
  - patient_01
  - patient_02
  - patient_03
  - patient_04
  - patient_05
  - patient_06
  - patient_07
  - patient_08
  - patient_09
  - patient_10
eval_every_n_steps: 256
eval_datasets:
- _config_type:
    _is_type: true
    _module: capsules.train
    _qualname: EvalDatasetConfig
  local_batch_size: 16
  dataset:
    _config_type:
      _is_type: true
      _module: capsules.tasks.longhealth
      _qualname: LongHealthEvalDataset.Config
    target:
      _is_type: true
      _module: capsules.tasks.longhealth
      _qualname: LongHealthEvalDataset
    kwargs: {}
    data_sources: []
    is_wandb: false
    label_type: tokens
    top_k_logits: 20
    dataset_weights: null
    user_prompt_prefix: null
    convo_transforms: null
    patient_ids:
    - patient_01
    - patient_02
    - patient_03
    - patient_04
    - patient_05
    - patient_06
    - patient_07
    - patient_08
    - patient_09
    - patient_10
    max_questions: 256
  name_for_wandb: longhealth_mc
  only_eval_rank_0: false
  dataloader_num_workers: 0
eval_log_table: true
eval_max_samples: null
generate_every_n_steps: 512
generate_datasets:
- _config_type:
    _is_type: true
    _module: capsules.train
    _qualname: GenerateDatasetConfig
  dataset:
    _config_type:
      _is_type: true
      _module: capsules.tasks.longhealth
      _qualname: LongHealthMultipleChoiceGenerateDataset.Config
    target:
      _is_type: true
      _module: capsules.tasks.longhealth
      _qualname: LongHealthMultipleChoiceGenerateDataset
    kwargs: {}
    patient_ids:
    - patient_01
    - patient_02
    - patient_03
    - patient_04
    - patient_05
    - patient_06
    - patient_07
    - patient_08
    - patient_09
    - patient_10
    max_questions: null
    include_diagnosis: true
    cot: true
  name_for_wandb: longhealth_mc
  dataloader_num_workers: 0
  num_samples: 4
  num_samples_final: 8
  temperature: 0.3
  batch_size: 16
  override_max_tokens: null
generate_max_new_tokens: 512
global_batch_size: 64
local_batch_size: 4
use_batch_sampler: false
tokenizer: meta-llama/Llama-3.2-1B-Instruct
epochs: 2
device: cuda
distributed_backend: gloo
optimizer: adam
lr: 0.02
lr_scheduler: null
kv_cache_initializer:
  _config_type:
    _is_type: true
    _module: capsules.kv_initialization.strategies.first_n_tokens
    _qualname: KVCacheInitFromFirstNTokensOfContext.Config
  target:
    _is_type: true
    _module: capsules.kv_initialization.strategies.first_n_tokens
    _qualname: KVCacheInitFromFirstNTokensOfContext
  kwargs: {}
  num_frozen_tokens: 1
  max_tokens: 2048
  context: null
pretrained_cache_path: null
loss_type: logits
save_every_n_steps: 512
save_after_training: true
keep_last_n_saved: 1
save_to_wandb: true
online_model: true
ema_cache: false
cache_ema_alpha: 0.9
max_optimizer_steps: -1
seed: 42
log_logprob_viz: false