Mahdip72 commited on
Commit
90f7981
·
verified ·
1 Parent(s): b9556cb

Upload 3 files

Browse files

feat: protein melting temperature task

protein_melting_temperature/2025-06-05__00-04-55/checkpoints/best_valid_protein_melting_temperature_rmse.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c471bd761e651051e3bcf698374cc046b867ea48fd9d1514b810ba1d8566643b
3
+ size 4042601755
protein_melting_temperature/2025-06-05__00-04-55/config.yaml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fix_seed: 0
2
+ checkpoints_every: 256
3
+ tensorboard_log: True
4
+ tqdm_progress_bar: False
5
+ result_path: ./results/protein_melting_temperature/
6
+
7
+ resume:
8
+ enable: False
9
+ resume_path: results/auxiliary/2024-12-18__22-08-46/checkpoints/checkpoint_16.pth
10
+ restart_optimizer: True
11
+ restart_steps: True
12
+ restart_decoder_embedding: False
13
+
14
+ prot2token_model:
15
+ compile_model: False
16
+ positional_encoding_type: learned # absolute, learned
17
+ protein_encoder:
18
+ model_type: esm_v2 # esm_v2, t5, esmc
19
+ model_name: facebook/esm2_t33_650M_UR50D # esmc_600m, facebook/esm2_t33_650M_UR50D, facebook/esm2_t30_150M_UR50D, facebook/esm2_t12_35M_UR50D, facebook/esm2_t6_8M_UR50D, Rostlab/prot_t5_base_mt_uniref50
20
+ max_len: 552
21
+ max_label_index: 1024
22
+ drop_positional_encoding: True
23
+ quantization_4_bit: False # use with tune_embedding enable
24
+ tune_embedding: False # only for esm
25
+ fine_tune:
26
+ enable: True
27
+ last_layers_trainable: 1
28
+ lora:
29
+ enable: False
30
+ r: 8
31
+ lora_alpha: 32
32
+ lora_dropout: 0.05
33
+ decoder:
34
+ latest_flash_attention: False
35
+ dimension: 640
36
+ dim_feedforward: 2560
37
+ num_heads: 16
38
+ num_layers: 16
39
+ max_len: 16
40
+ activation_function: gelu
41
+ decoder_context_dropout: 0.2
42
+ freeze:
43
+ embedding: False
44
+ blocks: False
45
+ head: False
46
+ cross_attention_pos_embeddings: False
47
+ pretrained:
48
+ pretrained: False
49
+ pretrained_path: ../pre-trained decoder/2/decoder.pth
50
+
51
+ train_settings:
52
+ skip: False
53
+ data_path: ../../datasets/Joint_training/
54
+ num_epochs: 48
55
+ start_metric_epoch: 4
56
+ shuffle: True
57
+ loss: crossentropy # crossentropy or focal
58
+ sample_weight: True
59
+ task_weight: False
60
+ mixed_precision: bf16 # no, fp16, bf16, fp8
61
+ device: cuda
62
+ batch_size: 64
63
+ random_masking: 0.0 # 0 to 1.0
64
+ num_workers: 4
65
+ grad_accumulation: 1
66
+ max_task_samples: 150000
67
+
68
+ valid_settings:
69
+ data_path: ../../datasets/Joint_training/
70
+ do_every: 1
71
+ batch_size: 1 # 1 when perplexity = False
72
+ perplexity: False
73
+ device: cuda
74
+ num_workers: 2
75
+
76
+ test_settings:
77
+ enable: True
78
+ data_path: ../../datasets/Joint_training/
79
+ batch_size: 1
80
+ device: cuda
81
+ num_workers: 0
82
+ inference_type: inference_greedy # inference_beam_search, inference_greedy
83
+ beam_search:
84
+ top_k: 1
85
+ beam_width: 3
86
+ temperature: 1.0
87
+ monitoring_metrics:
88
+ protein_melting_temperature: rmse
89
+
90
+ tasks:
91
+ protein_melting_temperature: True
protein_melting_temperature/2025-06-05__00-04-55/decoder_tokenizer.yaml ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .: 14
2
+ '0': 4
3
+ '1': 5
4
+ '2': 6
5
+ '3': 7
6
+ '4': 8
7
+ '5': 9
8
+ '6': 10
9
+ '7': 11
10
+ '8': 12
11
+ '9': 13
12
+ <bos>: 1
13
+ <eos>: 2
14
+ <pad>: 0
15
+ <task_protein_melting_temperature>: 3