ThivyanRR commited on
Commit
fc95062
·
verified ·
1 Parent(s): 8dd1c83

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. config.json +118 -0
  2. generation_config.json +0 -0
config.json ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ThivyanRR/indic_seamlessm4t_v2_large",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "adaptor_dropout": 0.1,
6
+ "adaptor_kernel_size": 8,
7
+ "adaptor_stride": 8,
8
+ "add_adapter": true,
9
+ "architectures": [
10
+ "SeamlessM4Tv2Model"
11
+ ],
12
+ "attention_dropout": 0.1,
13
+ "bos_token_id": 2,
14
+ "char_vocab_size": 10943,
15
+ "conv_depthwise_kernel_size": 31,
16
+ "decoder_attention_heads": 16,
17
+ "decoder_ffn_dim": 8192,
18
+ "decoder_layerdrop": 0.05,
19
+ "decoder_layers": 24,
20
+ "decoder_start_token_id": 3,
21
+ "dropout": 0.1,
22
+ "encoder_attention_heads": 16,
23
+ "encoder_ffn_dim": 8192,
24
+ "encoder_layerdrop": 0.05,
25
+ "encoder_layers": 24,
26
+ "eos_token_id": 3,
27
+ "feature_projection_input_dim": 160,
28
+ "hidden_size": 1024,
29
+ "initializer_range": 0.02,
30
+ "is_encoder_decoder": true,
31
+ "lang_embed_dim": 256,
32
+ "layer_norm_eps": 1e-05,
33
+ "leaky_relu_slope": 0.1,
34
+ "left_max_position_embeddings": 64,
35
+ "max_new_tokens": 256,
36
+ "max_position_embeddings": 4096,
37
+ "model_type": "seamless_m4t_v2",
38
+ "num_adapter_layers": 1,
39
+ "num_attention_heads": 16,
40
+ "num_hidden_layers": 24,
41
+ "pad_token_id": 0,
42
+ "position_embeddings_type": "relative_key",
43
+ "resblock_dilation_sizes": [
44
+ [
45
+ 1,
46
+ 3,
47
+ 5
48
+ ],
49
+ [
50
+ 1,
51
+ 3,
52
+ 5
53
+ ],
54
+ [
55
+ 1,
56
+ 3,
57
+ 5
58
+ ]
59
+ ],
60
+ "resblock_kernel_sizes": [
61
+ 3,
62
+ 7,
63
+ 11
64
+ ],
65
+ "right_max_position_embeddings": 8,
66
+ "sampling_rate": 16000,
67
+ "scale_embedding": true,
68
+ "speech_encoder_attention_heads": 16,
69
+ "speech_encoder_chunk_size": 20000,
70
+ "speech_encoder_dropout": 0.0,
71
+ "speech_encoder_hidden_act": "swish",
72
+ "speech_encoder_intermediate_size": 4096,
73
+ "speech_encoder_layerdrop": 0.1,
74
+ "speech_encoder_layers": 24,
75
+ "speech_encoder_left_chunk_num": 128,
76
+ "spkr_embed_dim": 256,
77
+ "t2u_bos_token_id": 0,
78
+ "t2u_decoder_attention_heads": 16,
79
+ "t2u_decoder_ffn_dim": 8192,
80
+ "t2u_decoder_layers": 6,
81
+ "t2u_encoder_attention_heads": 16,
82
+ "t2u_encoder_ffn_dim": 8192,
83
+ "t2u_encoder_layers": 6,
84
+ "t2u_eos_token_id": 2,
85
+ "t2u_max_position_embeddings": 4096,
86
+ "t2u_pad_token_id": 1,
87
+ "t2u_variance_pred_dropout": 0.5,
88
+ "t2u_variance_predictor_embed_dim": 1024,
89
+ "t2u_variance_predictor_hidden_dim": 256,
90
+ "t2u_variance_predictor_kernel_size": 3,
91
+ "t2u_vocab_size": 10082,
92
+ "torch_dtype": "float32",
93
+ "transformers_version": "4.45.1",
94
+ "unit_embed_dim": 1280,
95
+ "unit_hifi_gan_vocab_size": 10000,
96
+ "upsample_initial_channel": 512,
97
+ "upsample_kernel_sizes": [
98
+ 11,
99
+ 8,
100
+ 8,
101
+ 4,
102
+ 4
103
+ ],
104
+ "upsample_rates": [
105
+ 5,
106
+ 4,
107
+ 4,
108
+ 2,
109
+ 2
110
+ ],
111
+ "use_cache": true,
112
+ "var_pred_dropout": 0.5,
113
+ "variance_predictor_kernel_size": 3,
114
+ "vocab_size": 256102,
115
+ "vocoder_num_langs": 37,
116
+ "vocoder_num_spkrs": 200,
117
+ "vocoder_offset": 4
118
+ }
generation_config.json ADDED
The diff for this file is too large to render. See raw diff