|
|
noise: |
|
|
type: loglinear |
|
|
sigma_min: 1e-4 |
|
|
sigma_max: 20 |
|
|
state_dependent: True |
|
|
|
|
|
mode: ppl_eval |
|
|
diffusion: absorbing_state |
|
|
vocab: old_smiles |
|
|
backbone: roformer |
|
|
parameterization: subs |
|
|
time_conditioning: False |
|
|
T: 0 |
|
|
subs_masking: False |
|
|
|
|
|
seed: 42 |
|
|
|
|
|
mcts: |
|
|
num_children: 50 |
|
|
num_objectives: 5 |
|
|
topk: 100 |
|
|
mask_token: 4 |
|
|
num_iter: 128 |
|
|
sampling: 0 |
|
|
invalid_penalty: 0.5 |
|
|
sample_prob: 1.0 |
|
|
perm: True |
|
|
dual: False |
|
|
single: False |
|
|
time_dependent: True |
|
|
|
|
|
lr_scheduler: |
|
|
_target_: transformers.get_constant_schedule_with_warmup |
|
|
num_warmup_steps: 2500 |
|
|
|
|
|
data: |
|
|
train: /home/st512/peptune/scripts/peptide-mdlm-mcts/data/finetune2/30K-train.csv |
|
|
valid: /home/st512/peptune/scripts/peptide-mdlm-mcts/data/finetune2/30K-val.csv |
|
|
batchinohup ng: wrapping |
|
|
|
|
|
loader: |
|
|
global_batch_size: 64 |
|
|
eval_global_batch_size: ${.global_batch_size} |
|
|
|
|
|
batch_size: ${div_up:${.global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}} |
|
|
eval_batch_size: ${div_up:${.eval_global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}} |
|
|
num_workers: ${eval:"len(__import__('os').sched_getaffinity(0))"} |
|
|
pin_memory: True |
|
|
|
|
|
sampling: |
|
|
predictor: ddpm_cache |
|
|
num_sequences: 100 |
|
|
sampling_eps: 1e-3 |
|
|
steps: 128 |
|
|
seq_length: 100 |
|
|
noise_removal: True |
|
|
num_sample_batches: 2 |
|
|
num_sample_log: 2 |
|
|
stride_length: 1 |
|
|
num_strides: 1 |
|
|
|
|
|
training: |
|
|
antithetic_sampling: True |
|
|
sampling_eps: 1e-3 |
|
|
focus_mask: False |
|
|
|
|
|
accumulator: False |
|
|
|
|
|
eval: |
|
|
checkpoint_path: /home/st512/peptune/scripts/peptide-mdlm-mcts/checkpoints/11M-old-tokenizer/epoch=10-step=156276.ckpt |
|
|
disable_ema: False |
|
|
compute_generative_perplexity: False |
|
|
perplexity_batch_size: 8 |
|
|
compute_perplexity_on_sanity: False |
|
|
gen_ppl_eval_model_name_or_path: gpt2-large |
|
|
generate_samples: True |
|
|
generation_model: /home/st512/peptune/scripts/peptide-mdlm-mcts/checkpoints/11M-old-tokenizer/ |
|
|
|
|
|
optim: |
|
|
weight_decay: 0.075 |
|
|
lr: 3e-4 |
|
|
beta1: 0.9 |
|
|
beta2: 0.999 |
|
|
eps: 1e-8 |
|
|
|
|
|
pepclm: |
|
|
hidden_size: 768 |
|
|
cond_dim: 256 |
|
|
n_heads: 20 |
|
|
n_blocks: 4 |
|
|
dropout: 0.5 |
|
|
length: 512 |
|
|
|
|
|
|
|
|
model: |
|
|
type: ddit |
|
|
hidden_size: 768 |
|
|
cond_dim: 128 |
|
|
length: 512 |
|
|
n_blocks: 12 |
|
|
n_heads: 12 |
|
|
scale_by_sigma: True |
|
|
dropout: 0.1 |
|
|
|
|
|
roformer: |
|
|
hidden_size: 768 |
|
|
n_layers: 8 |
|
|
n_heads: 8 |
|
|
max_position_embeddings: 1035 |
|
|
|
|
|
helmgpt: |
|
|
hidden_size: 256 |
|
|
embd_pdrop: 0.1 |
|
|
resid_pdrop: 0.1 |
|
|
attn_pdrop: 0.1 |
|
|
ff_dropout: 0. |
|
|
block_size: 140 |
|
|
n_layer: 8 |
|
|
n_heads: 8 |
|
|
|
|
|
|
|
|
trainer: |
|
|
_target_: lightning.Trainer |
|
|
accelerator: cuda |
|
|
num_nodes: 1 |
|
|
devices: ${device_count:} |
|
|
accumulate_grad_batches: ${div_up:${loader.global_batch_size}, ${eval:${trainer.devices} * ${loader.batch_size} * ${trainer.num_nodes}}} |
|
|
gradient_clip_val: 1.0 |
|
|
precision: 64-true |
|
|
num_sanity_val_steps: 2 |
|
|
max_epochs: 100 |
|
|
max_steps: 1_000_000 |
|
|
log_every_n_steps: 10 |
|
|
limit_train_batches: 1.0 |
|
|
limit_val_batches: 1.0 |
|
|
|
|
|
check_val_every_n_epoch: 1 |
|
|
|
|
|
|
|
|
wandb: |
|
|
project: peptune |
|
|
notes: null |
|
|
group: null |
|
|
job_type: null |
|
|
name: sophia-tang |
|
|
id: ${.name}_nov12_set2 |
|
|
|
|
|
hydra: |
|
|
run: |
|
|
dir: ./${now:%Y.%m.%d}/ |
|
|
job: |
|
|
chdir: True |
|
|
|
|
|
checkpointing: |
|
|
|
|
|
save_dir: ${cwd:} |
|
|
|
|
|
resume_from_ckpt: True |
|
|
resume_ckpt_path: /home/st512/peptune/scripts/peptide-mdlm-mcts/checkpoints/11M-old-tokenizer/epoch=7-step=108225.ckpt |
|
|
|
|
|
callbacks: |
|
|
model_checkpoint: |
|
|
_target_: pytorch_lightning.callbacks.ModelCheckpoint |
|
|
every_n_epochs: 1 |
|
|
monitor: "val/nll" |
|
|
save_top_k: 10 |
|
|
mode: "min" |
|
|
dirpath: '/home/st512/peptune/scripts/peptide-mdlm-mcts/checkpoints/11M-old-tokenizer' |
|
|
|