Mahdip72
/

prot2token

Model card Files Files and versions Community

Mahdip72 commited on 27 days ago

Commit

90f7981

verified ·

1 Parent(s): b9556cb

Upload 3 files

Browse files

feat: protein melting temperature task

Files changed (3) hide show

protein_melting_temperature/2025-06-05__00-04-55/checkpoints/best_valid_protein_melting_temperature_rmse.pth +3 -0
protein_melting_temperature/2025-06-05__00-04-55/config.yaml +91 -0
protein_melting_temperature/2025-06-05__00-04-55/decoder_tokenizer.yaml +15 -0

protein_melting_temperature/2025-06-05__00-04-55/checkpoints/best_valid_protein_melting_temperature_rmse.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c471bd761e651051e3bcf698374cc046b867ea48fd9d1514b810ba1d8566643b
+size 4042601755

protein_melting_temperature/2025-06-05__00-04-55/config.yaml ADDED Viewed

	@@ -0,0 +1,91 @@

+fix_seed: 0
+checkpoints_every: 256
+tensorboard_log: True
+tqdm_progress_bar: False
+result_path: ./results/protein_melting_temperature/
+resume:
+  enable: False
+  resume_path: results/auxiliary/2024-12-18__22-08-46/checkpoints/checkpoint_16.pth
+  restart_optimizer: True
+  restart_steps: True
+  restart_decoder_embedding: False
+prot2token_model:
+  compile_model: False
+  positional_encoding_type: learned # absolute, learned
+  protein_encoder:
+    model_type: esm_v2 # esm_v2, t5, esmc
+    model_name:  facebook/esm2_t33_650M_UR50D # esmc_600m, facebook/esm2_t33_650M_UR50D, facebook/esm2_t30_150M_UR50D, facebook/esm2_t12_35M_UR50D, facebook/esm2_t6_8M_UR50D, Rostlab/prot_t5_base_mt_uniref50
+    max_len: 552
+    max_label_index: 1024
+    drop_positional_encoding: True
+    quantization_4_bit: False # use with tune_embedding enable
+    tune_embedding: False # only for esm
+    fine_tune:
+      enable: True
+      last_layers_trainable: 1
+    lora:
+      enable: False
+      r: 8
+      lora_alpha: 32
+      lora_dropout: 0.05
+  decoder:
+    latest_flash_attention: False
+    dimension: 640
+    dim_feedforward: 2560
+    num_heads: 16
+    num_layers: 16
+    max_len: 16
+    activation_function: gelu
+    decoder_context_dropout: 0.2
+    freeze:
+      embedding: False
+      blocks: False
+      head: False
+      cross_attention_pos_embeddings: False
+    pretrained:
+      pretrained: False
+      pretrained_path: ../pre-trained decoder/2/decoder.pth
+train_settings:
+  skip: False
+  data_path: ../../datasets/Joint_training/
+  num_epochs: 48
+  start_metric_epoch: 4
+  shuffle: True
+  loss: crossentropy # crossentropy or focal
+  sample_weight: True
+  task_weight: False
+  mixed_precision: bf16 # no, fp16, bf16, fp8
+  device: cuda
+  batch_size: 64
+  random_masking: 0.0 # 0 to 1.0
+  num_workers: 4
+  grad_accumulation: 1
+  max_task_samples: 150000
+valid_settings:
+  data_path: ../../datasets/Joint_training/
+  do_every: 1
+  batch_size: 1 # 1 when perplexity = False
+  perplexity: False
+  device: cuda
+  num_workers: 2
+test_settings:
+  enable: True
+  data_path: ../../datasets/Joint_training/
+  batch_size: 1
+  device: cuda
+  num_workers: 0
+  inference_type: inference_greedy # inference_beam_search, inference_greedy
+  beam_search:
+    top_k: 1
+    beam_width: 3
+    temperature: 1.0
+  monitoring_metrics:
+    protein_melting_temperature: rmse
+tasks:
+  protein_melting_temperature: True

protein_melting_temperature/2025-06-05__00-04-55/decoder_tokenizer.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+.: 14
+'0': 4
+'1': 5
+'2': 6
+'3': 7
+'4': 8
+'5': 9
+'6': 10
+'7': 11
+'8': 12
+'9': 13
+<bos>: 1
+<eos>: 2
+<pad>: 0
+<task_protein_melting_temperature>: 3