Training Configuration
_config_type:
_is_type: true
_module: capsules.train
_qualname: TrainConfig
run_dir: /data/sabri/capsules/2025-05-10-14-56-42-train_longhealth_simple/68e4c064-dc5a-46c8-a726-b3c7977e9e1a
output_dir: /data/sabri/capsules
run_id: 68e4c064-dc5a-46c8-a726-b3c7977e9e1a
launch_id: 2025-05-10-14-56-42-train_longhealth_simple
script_id: train_longhealth_simple
name: train_longhealth_simple_p10_lr0.02_toks2048
model:
_config_type:
_is_type: true
_module: capsules.config
_qualname: HFModelConfig
checkpoint_path: null
pretrained_model_name_or_path: meta-llama/Llama-3.2-3B-Instruct
load_kwargs: {}
peft:
_config_type:
_is_type: true
_module: capsules.config
_qualname: PeftConfig
enabled: false
method: lora
r: 8
alpha: 16
dropout: 0.0
bias: none
task_type: CAUSAL_LM
num_virtual_tokens: 20
encoder_hidden_size: null
prefix_projection: false
prompt_tuning_init: null
prompt_tuning_init_text: null
encoder_reparameterization_type: MLP
encoder_dropout: 0.0
adapter_reduction_factor: 16
adapter_non_linearity: relu
target_modules: null
extra_params: {}
tuning_method: custom_prefix
model_cls:
_is_type: true
_module: capsules.models.llama
_qualname: LlamaForCausalLM
attn_implementation: einsum
wandb:
_config_type:
_is_type: true
_module: capsules.utils.wandb
_qualname: WandBConfig
project: capsules
entity: hazy-research
name: train_longhealth_simple_p10_lr0.02_toks2048
tags:
- train
- longhealth
- patientsp10
notes: null
group: null
dataset:
_config_type:
_is_type: true
_module: capsules.datasets
_qualname: CapsuleDatasetLatest.Config
target:
_is_type: true
_module: capsules.datasets
_qualname: CapsuleDatasetLatest
kwargs: {}
data_sources:
- !!python/tuple
- hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v0
- null
- !!python/tuple
- hazy-research/capsules/generate_longhealth_simple_p10_s5_n65536:v1
- null
is_wandb: true
label_type: logits
top_k_logits: 20
dataset_weights: null
user_prompt_prefix: null
convo_transforms: null
max_sequence_length: 1024
context:
_config_type:
_is_type: true
_module: capsules.tasks.longhealth.context
_qualname: LongHealthStructuredContextConfig
patient_ids:
- patient_01
- patient_02
- patient_03
- patient_04
- patient_05
- patient_06
- patient_07
- patient_08
- patient_09
- patient_10
eval_every_n_steps: 256
eval_datasets:
- _config_type:
_is_type: true
_module: capsules.train
_qualname: EvalDatasetConfig
local_batch_size: 16
dataset:
_config_type:
_is_type: true
_module: capsules.tasks.longhealth
_qualname: LongHealthEvalDataset.Config
target:
_is_type: true
_module: capsules.tasks.longhealth
_qualname: LongHealthEvalDataset
kwargs: {}
data_sources: []
is_wandb: false
label_type: tokens
top_k_logits: 20
dataset_weights: null
user_prompt_prefix: null
convo_transforms: null
patient_ids:
- patient_01
- patient_02
- patient_03
- patient_04
- patient_05
- patient_06
- patient_07
- patient_08
- patient_09
- patient_10
max_questions: 256
name_for_wandb: longhealth_mc
only_eval_rank_0: false
dataloader_num_workers: 0
eval_log_table: true
eval_max_samples: null
generate_every_n_steps: 512
generate_datasets:
- _config_type:
_is_type: true
_module: capsules.train
_qualname: GenerateDatasetConfig
dataset:
_config_type:
_is_type: true
_module: capsules.tasks.longhealth
_qualname: LongHealthMultipleChoiceGenerateDataset.Config
target:
_is_type: true
_module: capsules.tasks.longhealth
_qualname: LongHealthMultipleChoiceGenerateDataset
kwargs: {}
patient_ids:
- patient_01
- patient_02
- patient_03
- patient_04
- patient_05
- patient_06
- patient_07
- patient_08
- patient_09
- patient_10
max_questions: null
include_diagnosis: true
cot: true
name_for_wandb: longhealth_mc
dataloader_num_workers: 0
num_samples: 4
num_samples_final: 8
temperature: 0.3
batch_size: 16
override_max_tokens: null
generate_max_new_tokens: 512
global_batch_size: 64
local_batch_size: 4
use_batch_sampler: false
tokenizer: meta-llama/Llama-3.2-1B-Instruct
epochs: 2
device: cuda
distributed_backend: gloo
optimizer: adam
lr: 0.02
lr_scheduler: null
kv_cache_initializer:
_config_type:
_is_type: true
_module: capsules.kv_initialization.strategies.first_n_tokens
_qualname: KVCacheInitFromFirstNTokensOfContext.Config
target:
_is_type: true
_module: capsules.kv_initialization.strategies.first_n_tokens
_qualname: KVCacheInitFromFirstNTokensOfContext
kwargs: {}
num_frozen_tokens: 1
max_tokens: 2048
context: null
pretrained_cache_path: null
loss_type: logits
save_every_n_steps: 512
save_after_training: true
keep_last_n_saved: 1
save_to_wandb: true
online_model: true
ema_cache: false
cache_ema_alpha: 0.9
max_optimizer_steps: -1
seed: 42
log_logprob_viz: false
- Downloads last month
- 45
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support