Yinuo Zhang
		
	commited on
		
		
					Commit 
							
							·
						
						d79b4f8
	
1
								Parent(s):
							
							724b6d6
								
Add large file using Git LFS
Browse files- configs/config.yaml +171 -0
- configs/path.yaml +7 -0
- main.py +27 -26
- peptune-pretrained.ckpt +3 -0
    	
        configs/config.yaml
    ADDED
    
    | @@ -0,0 +1,171 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            defaults:
         | 
| 2 | 
            +
              - path
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            noise:
         | 
| 5 | 
            +
              type: loglinear
         | 
| 6 | 
            +
              sigma_min: 1e-4
         | 
| 7 | 
            +
              sigma_max: 20
         | 
| 8 | 
            +
              state_dependent: True
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            mode: ppl_eval  # train / ppl_eval / sample_eval
         | 
| 11 | 
            +
            diffusion: absorbing_state
         | 
| 12 | 
            +
            vocab: old_smiles # old_smiles / new_smiles / selfies / helm
         | 
| 13 | 
            +
            backbone: roformer  # peptideclm / helmgpt / dit / roformer / finetune_roformer
         | 
| 14 | 
            +
            parameterization: subs  # subs
         | 
| 15 | 
            +
            time_conditioning: False
         | 
| 16 | 
            +
            T: 0  # 0 (continuous time) / 1000 
         | 
| 17 | 
            +
            subs_masking: False
         | 
| 18 | 
            +
             | 
| 19 | 
            +
            seed: 42
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            mcts: 
         | 
| 22 | 
            +
              num_children: 50
         | 
| 23 | 
            +
              num_objectives: 5
         | 
| 24 | 
            +
              topk: 100
         | 
| 25 | 
            +
              mask_token: 4
         | 
| 26 | 
            +
              num_iter: 128
         | 
| 27 | 
            +
              sampling: 0 # 0 is gumbel sampling / > 0 samples children from top k probs
         | 
| 28 | 
            +
              invalid_penalty: 0.5
         | 
| 29 | 
            +
              sample_prob: 1.0
         | 
| 30 | 
            +
              perm: True
         | 
| 31 | 
            +
              dual: False
         | 
| 32 | 
            +
              single: False
         | 
| 33 | 
            +
              time_dependent: True
         | 
| 34 | 
            +
             | 
| 35 | 
            +
            lr_scheduler:
         | 
| 36 | 
            +
              _target_: transformers.get_constant_schedule_with_warmup
         | 
| 37 | 
            +
              num_warmup_steps: 2500
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            data:
         | 
| 40 | 
            +
              train: ${paths.data}/finetune2/30K-train.csv
         | 
| 41 | 
            +
              valid: ${paths.data}/finetune2/30K-val.csv
         | 
| 42 | 
            +
              batchinohup ng: wrapping # padding / wrapping
         | 
| 43 | 
            +
             | 
| 44 | 
            +
            loader:
         | 
| 45 | 
            +
              global_batch_size: 64
         | 
| 46 | 
            +
              eval_global_batch_size: ${.global_batch_size}
         | 
| 47 | 
            +
              # Note: batch_size and eval_batch_size are **per machine**
         | 
| 48 | 
            +
              batch_size: ${div_up:${.global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}}
         | 
| 49 | 
            +
              eval_batch_size: ${div_up:${.eval_global_batch_size}, ${eval:${trainer.devices} * ${trainer.num_nodes}}}
         | 
| 50 | 
            +
              num_workers: ${eval:"len(__import__('os').sched_getaffinity(0))"}
         | 
| 51 | 
            +
              pin_memory: True
         | 
| 52 | 
            +
             | 
| 53 | 
            +
            sampling:
         | 
| 54 | 
            +
              predictor: ddpm_cache  # analytic, ddpm, ddpm_cache
         | 
| 55 | 
            +
              num_sequences: 100
         | 
| 56 | 
            +
              sampling_eps: 1e-3
         | 
| 57 | 
            +
              steps: 128
         | 
| 58 | 
            +
              seq_length: 100
         | 
| 59 | 
            +
              noise_removal: True
         | 
| 60 | 
            +
              num_sample_batches: 2  # Total samples: `num_gpus` * `loader.eval_batch_size` * num_sample_batches
         | 
| 61 | 
            +
              num_sample_log: 2
         | 
| 62 | 
            +
              stride_length: 1
         | 
| 63 | 
            +
              num_strides: 1
         | 
| 64 | 
            +
             | 
| 65 | 
            +
            training:
         | 
| 66 | 
            +
              antithetic_sampling: True
         | 
| 67 | 
            +
              sampling_eps: 1e-3
         | 
| 68 | 
            +
              focus_mask: False
         | 
| 69 | 
            +
              #dynamic_batching: True
         | 
| 70 | 
            +
              accumulator: False
         | 
| 71 | 
            +
             | 
| 72 | 
            +
            eval:
         | 
| 73 | 
            +
              checkpoint_path: ${paths.checkpoints}/11M-old-tokenizer/epoch=10-step=156276.ckpt
         | 
| 74 | 
            +
              disable_ema: False
         | 
| 75 | 
            +
              compute_generative_perplexity: False
         | 
| 76 | 
            +
              perplexity_batch_size: 8
         | 
| 77 | 
            +
              compute_perplexity_on_sanity: False
         | 
| 78 | 
            +
              gen_ppl_eval_model_name_or_path: gpt2-large  # gpt2-large, meta-llama/Llama-2-7b-hf
         | 
| 79 | 
            +
              generate_samples: True
         | 
| 80 | 
            +
              generation_model: ${paths.checkpoints}/11M-old-tokenizer/
         | 
| 81 | 
            +
              
         | 
| 82 | 
            +
            optim:
         | 
| 83 | 
            +
              weight_decay: 0.075
         | 
| 84 | 
            +
              lr: 3e-4
         | 
| 85 | 
            +
              beta1: 0.9
         | 
| 86 | 
            +
              beta2: 0.999
         | 
| 87 | 
            +
              eps: 1e-8
         | 
| 88 | 
            +
             | 
| 89 | 
            +
            pepclm:
         | 
| 90 | 
            +
              hidden_size: 768
         | 
| 91 | 
            +
              cond_dim: 256
         | 
| 92 | 
            +
              n_heads: 20
         | 
| 93 | 
            +
              n_blocks: 4
         | 
| 94 | 
            +
              dropout: 0.5
         | 
| 95 | 
            +
              length: 512
         | 
| 96 | 
            +
              #scale_by_sigma: True
         | 
| 97 | 
            +
             | 
| 98 | 
            +
            model:
         | 
| 99 | 
            +
              type: ddit
         | 
| 100 | 
            +
              hidden_size: 768
         | 
| 101 | 
            +
              cond_dim: 128
         | 
| 102 | 
            +
              length: 512
         | 
| 103 | 
            +
              n_blocks: 12
         | 
| 104 | 
            +
              n_heads: 12
         | 
| 105 | 
            +
              scale_by_sigma: True
         | 
| 106 | 
            +
              dropout: 0.1
         | 
| 107 | 
            +
             | 
| 108 | 
            +
            roformer:
         | 
| 109 | 
            +
              hidden_size: 768
         | 
| 110 | 
            +
              n_layers: 8
         | 
| 111 | 
            +
              n_heads: 8
         | 
| 112 | 
            +
              max_position_embeddings: 1035
         | 
| 113 | 
            +
             | 
| 114 | 
            +
            helmgpt:
         | 
| 115 | 
            +
              hidden_size: 256
         | 
| 116 | 
            +
              embd_pdrop: 0.1
         | 
| 117 | 
            +
              resid_pdrop: 0.1
         | 
| 118 | 
            +
              attn_pdrop: 0.1
         | 
| 119 | 
            +
              ff_dropout: 0.
         | 
| 120 | 
            +
              block_size: 140
         | 
| 121 | 
            +
              n_layer: 8
         | 
| 122 | 
            +
              n_heads: 8
         | 
| 123 | 
            +
             | 
| 124 | 
            +
             | 
| 125 | 
            +
            trainer:
         | 
| 126 | 
            +
              _target_: lightning.Trainer
         | 
| 127 | 
            +
              accelerator: cuda
         | 
| 128 | 
            +
              num_nodes: 1
         | 
| 129 | 
            +
              devices: ${device_count:}
         | 
| 130 | 
            +
              accumulate_grad_batches: ${div_up:${loader.global_batch_size}, ${eval:${trainer.devices} * ${loader.batch_size} * ${trainer.num_nodes}}}
         | 
| 131 | 
            +
              gradient_clip_val: 1.0
         | 
| 132 | 
            +
              precision: 64-true
         | 
| 133 | 
            +
              num_sanity_val_steps: 2
         | 
| 134 | 
            +
              max_epochs: 100
         | 
| 135 | 
            +
              max_steps: 1_000_000
         | 
| 136 | 
            +
              log_every_n_steps: 10
         | 
| 137 | 
            +
              limit_train_batches: 1.0   # train on full dataset, can be used to toggle quick run
         | 
| 138 | 
            +
              limit_val_batches: 1.0     # validate on full dataset, can be used to toggle quick run
         | 
| 139 | 
            +
              #val_check_interval: 40 #954
         | 
| 140 | 
            +
              check_val_every_n_epoch: 1
         | 
| 141 | 
            +
             | 
| 142 | 
            +
             | 
| 143 | 
            +
            wandb:
         | 
| 144 | 
            +
              project: ${env_or:WANDB_PROJECT,peptune}
         | 
| 145 | 
            +
              notes: null
         | 
| 146 | 
            +
              group: null
         | 
| 147 | 
            +
              job_type: null
         | 
| 148 | 
            +
              name: ${env_or:WANDB_RUN_NAME,local}
         | 
| 149 | 
            +
              id: ${.name}
         | 
| 150 | 
            +
             | 
| 151 | 
            +
            hydra:
         | 
| 152 | 
            +
              run:
         | 
| 153 | 
            +
                dir: ./${now:%Y.%m.%d}/
         | 
| 154 | 
            +
              job:
         | 
| 155 | 
            +
                chdir: True
         | 
| 156 | 
            +
             | 
| 157 | 
            +
            checkpointing:
         | 
| 158 | 
            +
              # Use custom `save_dir` if, e.g., saving to S3 bucket, otherwise leave this parameter as is
         | 
| 159 | 
            +
              save_dir: ${paths.outputs}
         | 
| 160 | 
            +
              # Note: `checkpoints` path should correspond to `checkpoint_every_n_steps.dirpath`
         | 
| 161 | 
            +
              resume_from_ckpt: True
         | 
| 162 | 
            +
              resume_ckpt_path: ${paths.checkpoints}/last.ckpt
         | 
| 163 | 
            +
             | 
| 164 | 
            +
            callbacks:
         | 
| 165 | 
            +
              model_checkpoint:
         | 
| 166 | 
            +
                _target_: pytorch_lightning.callbacks.ModelCheckpoint
         | 
| 167 | 
            +
                every_n_epochs: 1
         | 
| 168 | 
            +
                monitor: "val/nll"
         | 
| 169 | 
            +
                save_top_k: 10
         | 
| 170 | 
            +
                mode: "min"
         | 
| 171 | 
            +
                dirpath: ${paths.checkpoints}/11M-old-tokenizer
         | 
    	
        configs/path.yaml
    ADDED
    
    | @@ -0,0 +1,7 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            paths:
         | 
| 2 | 
            +
              base: ${cwd:}             # repo root
         | 
| 3 | 
            +
              data: ${paths.base}/data
         | 
| 4 | 
            +
              checkpoints: ${paths.base}/checkpoints
         | 
| 5 | 
            +
              tokenizers: ${paths.base}/tokenizers
         | 
| 6 | 
            +
              outputs: ${paths.base}/outputs
         | 
| 7 | 
            +
             | 
    	
        main.py
    CHANGED
    
    | @@ -16,7 +16,6 @@ import torch | |
| 16 | 
             
            import sys
         | 
| 17 | 
             
            import torch.distributed as dist
         | 
| 18 | 
             
            from torch.nn.parallel import DistributedDataParallel as DDP
         | 
| 19 | 
            -
            sys.path.append("/home/st512/peptune/scripts/peptide-mdlm-mcts")
         | 
| 20 |  | 
| 21 | 
             
            import dataset as dataloader
         | 
| 22 | 
             
            import dataloading_for_dynamic_batching as dynamic_dataloader
         | 
| @@ -30,24 +29,25 @@ from tokenizer.my_tokenizers import SMILES_SPE_Tokenizer | |
| 30 | 
             
            from helm_tokenizer.helm_tokenizer import HelmTokenizer
         | 
| 31 |  | 
| 32 |  | 
| 33 | 
            -
            #wandb.login(key="5a7613c531cb58f9802f3f8e2f73bc4997b917ab")
         | 
| 34 | 
            -
             | 
| 35 | 
             
            omegaconf.OmegaConf.register_new_resolver('cwd', os.getcwd)
         | 
| 36 | 
             
            omegaconf.OmegaConf.register_new_resolver('device_count', torch.cuda.device_count)
         | 
| 37 | 
             
            omegaconf.OmegaConf.register_new_resolver('eval', eval)
         | 
| 38 | 
             
            omegaconf.OmegaConf.register_new_resolver('div_up', lambda x, y: (x + y - 1) // y)
         | 
|  | |
| 39 |  | 
| 40 | 
             
            def _load_from_checkpoint(config, tokenizer):
         | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
|  | |
|  | |
| 51 |  | 
| 52 | 
             
            @L.pytorch.utilities.rank_zero_only
         | 
| 53 | 
             
            def print_config(
         | 
| @@ -197,36 +197,37 @@ def _train(config, logger, tokenizer, data_module): | |
| 197 |  | 
| 198 | 
             
            	model = Diffusion(config, tokenizer=tokenizer)
         | 
| 199 |  | 
| 200 | 
            -
            	if config.backbone == 'finetune_roformer':
         | 
| 201 | 
            -
            		checkpoint = torch.load( | 
| 202 | 
            -
             | 
|  | |
| 203 |  | 
| 204 | 
             
            	trainer.fit(model, datamodule=data_module, ckpt_path=ckpt_path)
         | 
| 205 |  | 
| 206 |  | 
| 207 | 
            -
            @hydra.main(version_base=None, config_path=' | 
| 208 | 
             
            def main(config):
         | 
| 209 | 
             
            	"""
         | 
| 210 | 
             
             		Main entry point for training
         | 
| 211 | 
             
               """   
         | 
| 212 | 
            -
            	wandb.init(project="peptune")
         | 
| 213 | 
             
            	L.seed_everything(config.seed)
         | 
| 214 |  | 
| 215 | 
             
            	# print_config(config, resolve=True, save_cfg=True)
         | 
| 216 |  | 
| 217 | 
             
            	logger = utils.get_logger(__name__)
         | 
| 218 | 
             
            	# load PeptideCLM tokenizer
         | 
| 219 | 
            -
            	 | 
|  | |
| 220 | 
             
            		tokenizer = APETokenizer()
         | 
| 221 | 
            -
            		tokenizer.load_vocabulary('/ | 
| 222 | 
             
            	elif config.vocab == 'old_smiles':
         | 
| 223 | 
            -
            		tokenizer = SMILES_SPE_Tokenizer('/ | 
| 224 | 
            -
                                               '/ | 
| 225 | 
             
            	elif config.vocab == 'selfies':
         | 
| 226 | 
             
            		tokenizer = APETokenizer()
         | 
| 227 | 
            -
            		tokenizer.load_vocabulary('/ | 
| 228 | 
             
            	elif config.vocab == 'helm':
         | 
| 229 | 
            -
            		tokenizer = HelmTokenizer('/ | 
| 230 |  | 
| 231 | 
             
            	if config.backbone == 'finetune_roformer':
         | 
| 232 | 
             
            		train_dataset = load_dataset('csv', data_files=config.data.train)
         | 
| @@ -236,7 +237,7 @@ def main(config): | |
| 236 | 
             
            		val_dataset = val_dataset['train']#.select(lst)
         | 
| 237 | 
             
            		data_module = dataloader.CustomDataModule(train_dataset, val_dataset, None, tokenizer, batch_size=config.loader.global_batch_size)
         | 
| 238 | 
             
            	else:
         | 
| 239 | 
            -
            		data_module = dynamic_dataloader.CustomDataModule(' | 
| 240 |  | 
| 241 | 
             
            	if config.mode == 'sample_eval':
         | 
| 242 | 
             
            		generate_samples(config, logger, tokenizer)
         | 
| @@ -247,4 +248,4 @@ def main(config): | |
| 247 |  | 
| 248 |  | 
| 249 | 
             
            if __name__ == '__main__':
         | 
| 250 | 
            -
            	main()
         | 
|  | |
| 16 | 
             
            import sys
         | 
| 17 | 
             
            import torch.distributed as dist
         | 
| 18 | 
             
            from torch.nn.parallel import DistributedDataParallel as DDP
         | 
|  | |
| 19 |  | 
| 20 | 
             
            import dataset as dataloader
         | 
| 21 | 
             
            import dataloading_for_dynamic_batching as dynamic_dataloader
         | 
|  | |
| 29 | 
             
            from helm_tokenizer.helm_tokenizer import HelmTokenizer
         | 
| 30 |  | 
| 31 |  | 
|  | |
|  | |
| 32 | 
             
            omegaconf.OmegaConf.register_new_resolver('cwd', os.getcwd)
         | 
| 33 | 
             
            omegaconf.OmegaConf.register_new_resolver('device_count', torch.cuda.device_count)
         | 
| 34 | 
             
            omegaconf.OmegaConf.register_new_resolver('eval', eval)
         | 
| 35 | 
             
            omegaconf.OmegaConf.register_new_resolver('div_up', lambda x, y: (x + y - 1) // y)
         | 
| 36 | 
            +
            omegaconf.OmegaConf.register_new_resolver("env_or", lambda k, d: os.getenv(k, d))
         | 
| 37 |  | 
| 38 | 
             
            def _load_from_checkpoint(config, tokenizer):
         | 
| 39 | 
            +
                """Create Diffusion model; load weights if checkpoint_path is set."""
         | 
| 40 | 
            +
                if "hf" in str(config.get("backbone", "")):
         | 
| 41 | 
            +
                    return Diffusion(config, tokenizer=tokenizer).to("cuda")
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                ckpt_path = config.eval.checkpoint_path
         | 
| 44 | 
            +
                model = Diffusion.load_from_checkpoint(
         | 
| 45 | 
            +
                    ckpt_path,
         | 
| 46 | 
            +
                    tokenizer=tokenizer,
         | 
| 47 | 
            +
                    config=config,
         | 
| 48 | 
            +
                    map_location="cuda" if torch.cuda.is_available() else "cpu",
         | 
| 49 | 
            +
                )
         | 
| 50 | 
            +
                return model
         | 
| 51 |  | 
| 52 | 
             
            @L.pytorch.utilities.rank_zero_only
         | 
| 53 | 
             
            def print_config(
         | 
|  | |
| 197 |  | 
| 198 | 
             
            	model = Diffusion(config, tokenizer=tokenizer)
         | 
| 199 |  | 
| 200 | 
            +
            	if config.backbone == 'finetune_roformer' and config.eval.checkpoint_path:
         | 
| 201 | 
            +
            		checkpoint = torch.load(config.eval.checkpoint_path, map_location="cpu")
         | 
| 202 | 
            +
                    state = checkpoint.get("state_dict", checkpoint)
         | 
| 203 | 
            +
            		model.load_state_dict(state, strict=False)
         | 
| 204 |  | 
| 205 | 
             
            	trainer.fit(model, datamodule=data_module, ckpt_path=ckpt_path)
         | 
| 206 |  | 
| 207 |  | 
| 208 | 
            +
            @hydra.main(version_base=None, config_path='configs', config_name='config')
         | 
| 209 | 
             
            def main(config):
         | 
| 210 | 
             
            	"""
         | 
| 211 | 
             
             		Main entry point for training
         | 
| 212 | 
             
               """   
         | 
|  | |
| 213 | 
             
            	L.seed_everything(config.seed)
         | 
| 214 |  | 
| 215 | 
             
            	# print_config(config, resolve=True, save_cfg=True)
         | 
| 216 |  | 
| 217 | 
             
            	logger = utils.get_logger(__name__)
         | 
| 218 | 
             
            	# load PeptideCLM tokenizer
         | 
| 219 | 
            +
            	tok_dir = config.paths.tokenizers
         | 
| 220 | 
            +
                if config.vocab == 'new_smiles':
         | 
| 221 | 
             
            		tokenizer = APETokenizer()
         | 
| 222 | 
            +
            		tokenizer.load_vocabulary(f'{tok_dir}/peptide_smiles_600_vocab.json')
         | 
| 223 | 
             
            	elif config.vocab == 'old_smiles':
         | 
| 224 | 
            +
            		tokenizer = SMILES_SPE_Tokenizer(f'{tok_dir}/new_vocab.txt', 
         | 
| 225 | 
            +
                                               f'{tok_dir}/new_splits.txt')
         | 
| 226 | 
             
            	elif config.vocab == 'selfies':
         | 
| 227 | 
             
            		tokenizer = APETokenizer()
         | 
| 228 | 
            +
            		tokenizer.load_vocabulary(f'{tok_dir}/peptide_selfies_600_vocab.json')
         | 
| 229 | 
             
            	elif config.vocab == 'helm':
         | 
| 230 | 
            +
            		tokenizer = HelmTokenizer(f'{tok_dir}/monomer_vocab.txt')
         | 
| 231 |  | 
| 232 | 
             
            	if config.backbone == 'finetune_roformer':
         | 
| 233 | 
             
            		train_dataset = load_dataset('csv', data_files=config.data.train)
         | 
|  | |
| 237 | 
             
            		val_dataset = val_dataset['train']#.select(lst)
         | 
| 238 | 
             
            		data_module = dataloader.CustomDataModule(train_dataset, val_dataset, None, tokenizer, batch_size=config.loader.global_batch_size)
         | 
| 239 | 
             
            	else:
         | 
| 240 | 
            +
            		data_module = dynamic_dataloader.CustomDataModule(f'{config.paths.data}/smiles/11M_smiles_old_tokenizer_no_limit', tokenizer)
         | 
| 241 |  | 
| 242 | 
             
            	if config.mode == 'sample_eval':
         | 
| 243 | 
             
            		generate_samples(config, logger, tokenizer)
         | 
|  | |
| 248 |  | 
| 249 |  | 
| 250 | 
             
            if __name__ == '__main__':
         | 
| 251 | 
            +
            	main()
         | 
    	
        peptune-pretrained.ckpt
    ADDED
    
    | @@ -0,0 +1,3 @@ | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:b259f022c21121f5c755fed61230d6fdf2626ee4ab8a23df479b3cf553fd4aef
         | 
| 3 | 
            +
            size 1386966244
         | 
