huzaifanafees commited on
Commit
97fe9c2
·
1 Parent(s): f0e3a09

Upload 4 files

Browse files
Files changed (4) hide show
  1. config.py +12 -0
  2. load_model.py +27 -0
  3. transformer.py +273 -0
  4. translator.py +45 -0
config.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ class Config:
2
+ vocab_size = 28000 # Vocabulary Size
3
+ sequence_length = 35
4
+ batch_size = 128
5
+ validation_split = 0.20
6
+ embed_dim = 300
7
+ num_layers = 4
8
+ latent_dim = 2048
9
+ num_heads = 12
10
+ epochs = 50 # Number of Epochs to train
11
+ is_training = True
12
+ config = Config()
load_model.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformer import Transformer
2
+ import tensorflow_text as tf_text
3
+ import tensorflow as tf
4
+ from config import config
5
+
6
+
7
+ def load_model(en_emb_matrix, de_emb_matrix, model_path, config):
8
+ # Initialize and rebuild your Transformer model
9
+ # (Make sure to replace '...' with actual parameters)
10
+ model = Transformer(
11
+ num_layers=config.num_layers,
12
+ d_model=config.embed_dim,
13
+ num_heads=config.num_heads,
14
+ en_embedding_matrix=en_emb_matrix,
15
+ de_embedding_matrix=de_emb_matrix,
16
+ dff=config.latent_dim,
17
+ input_vocab_size=config.vocab_size,
18
+ target_vocab_size=config.vocab_size,
19
+ dropout_rate=0.2
20
+ )
21
+ model.load_weights(model_path)
22
+ return model
23
+
24
+ def load_sp_model(path_en,path_ur):
25
+ sp_model_en = tf_text.SentencepieceTokenizer(model=tf.io.gfile.GFile(path_en, 'rb').read(),add_bos=True,add_eos=True)
26
+ sp_model_ur = tf_text.SentencepieceTokenizer(model=tf.io.gfile.GFile(path_ur, 'rb').read(),reverse=True,add_bos=True,add_eos=True)
27
+ return sp_model_en, sp_model_ur
transformer.py ADDED
@@ -0,0 +1,273 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ from config import config
4
+
5
+ def positional_encoding(length, depth):
6
+ depth = depth/2
7
+
8
+ positions = np.arange(length)[:, np.newaxis] # (seq, 1)
9
+ depths = np.arange(depth)[np.newaxis, :]/depth # (1, depth)
10
+
11
+ angle_rates = 1 / (10000**depths) # (1, depth)
12
+ angle_rads = positions * angle_rates # (pos, depth)
13
+
14
+ pos_encoding = np.concatenate(
15
+ [np.sin(angle_rads), np.cos(angle_rads)],
16
+ axis=-1)
17
+
18
+ return tf.cast(pos_encoding, dtype=tf.float32)
19
+
20
+ class PositionalEmbedding(tf.keras.layers.Layer):
21
+ def __init__(self, vocab_size, d_model,embedding_matrix):
22
+ super().__init__()
23
+ self.d_model = d_model
24
+ self.embedding = tf.keras.layers.Embedding(vocab_size, d_model,
25
+ embeddings_initializer=tf.keras.initializers.Constant(embedding_matrix),
26
+ mask_zero=True)
27
+ self.pos_encoding = positional_encoding(length=config.latent_dim, depth=d_model)
28
+
29
+ def compute_mask(self, *args, **kwargs):
30
+ return self.embedding.compute_mask(*args, **kwargs)
31
+
32
+ def call(self, x):
33
+ length = tf.shape(x)[1]
34
+ x = self.embedding(x)
35
+ # This factor sets the relative scale of the embedding and positonal_encoding.
36
+ x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
37
+ x = x + self.pos_encoding[tf.newaxis, :length, :]
38
+ return x
39
+
40
+ class BaseAttention(tf.keras.layers.Layer):
41
+ def __init__(self, **kwargs):
42
+ super().__init__()
43
+ self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
44
+ self.layernorm = tf.keras.layers.LayerNormalization()
45
+ self.add = tf.keras.layers.Add()
46
+
47
+ class CrossAttention(BaseAttention):
48
+ def call(self, x, context):
49
+ attn_output, attn_scores = self.mha(
50
+ query=x,
51
+ key=context,
52
+ value=context,
53
+ return_attention_scores=True)
54
+
55
+ # Cache the attention scores for plotting later.
56
+ self.last_attn_scores = attn_scores
57
+
58
+ x = self.add([x, attn_output])
59
+ x = self.layernorm(x)
60
+
61
+ return x
62
+
63
+ class GlobalSelfAttention(BaseAttention):
64
+ def call(self, x):
65
+ attn_output = self.mha(
66
+ query=x,
67
+ value=x,
68
+ key=x)
69
+ x = self.add([x, attn_output])
70
+ x = self.layernorm(x)
71
+ return x
72
+
73
+ class CausalSelfAttention(BaseAttention):
74
+ def call(self, x):
75
+ attn_output = self.mha(
76
+ query=x,
77
+ value=x,
78
+ key=x,
79
+ use_causal_mask = True)
80
+ x = self.add([x, attn_output])
81
+ x = self.layernorm(x)
82
+ return x
83
+
84
+ class FeedForward(tf.keras.layers.Layer):
85
+ def __init__(self, d_model, dff, dropout_rate=0.1):
86
+ super().__init__()
87
+ self.seq = tf.keras.Sequential([
88
+ tf.keras.layers.Dense(dff, activation='relu'),
89
+ tf.keras.layers.Dense(d_model),
90
+ tf.keras.layers.Dropout(dropout_rate)
91
+ ])
92
+ self.add = tf.keras.layers.Add()
93
+ self.layer_norm = tf.keras.layers.LayerNormalization()
94
+
95
+ def call(self, x):
96
+ x = self.add([x, self.seq(x)])
97
+ x = self.layer_norm(x)
98
+ return x
99
+
100
+ class EncoderLayer(tf.keras.layers.Layer):
101
+ def __init__(self,*, d_model, num_heads, dff, dropout_rate=0.1):
102
+ super().__init__()
103
+
104
+ self.self_attention = GlobalSelfAttention(
105
+ num_heads=num_heads,
106
+ key_dim=d_model,
107
+ dropout=dropout_rate)
108
+
109
+ self.ffn = FeedForward(d_model, dff)
110
+
111
+ def call(self, x):
112
+ x = self.self_attention(x)
113
+ x = self.ffn(x)
114
+ return x
115
+
116
+ class Encoder(tf.keras.layers.Layer):
117
+ def __init__(self, *, num_layers, d_model, num_heads,embedding_matrix,
118
+ dff, vocab_size, dropout_rate=0.1):
119
+ super().__init__()
120
+
121
+ self.d_model = d_model
122
+ self.num_layers = num_layers
123
+ self.embedding_matrix = embedding_matrix
124
+
125
+ self.pos_embedding = PositionalEmbedding(
126
+ vocab_size=vocab_size, d_model=d_model,embedding_matrix=embedding_matrix)
127
+
128
+ self.enc_layers = [
129
+ EncoderLayer(d_model=d_model,
130
+ num_heads=num_heads,
131
+ dff=dff,
132
+ dropout_rate=dropout_rate)
133
+ for _ in range(num_layers)]
134
+ self.dropout = tf.keras.layers.Dropout(dropout_rate)
135
+
136
+ def call(self, x):
137
+ # `x` is token-IDs shape: (batch, seq_len)
138
+ x = self.pos_embedding(x) # Shape `(batch_size, seq_len, d_model)`.
139
+
140
+ # Add dropout.
141
+ x = self.dropout(x)
142
+
143
+ for i in range(self.num_layers):
144
+ x = self.enc_layers[i](x)
145
+
146
+ return x # Shape `(batch_size, seq_len, d_model)`.
147
+
148
+ class DecoderLayer(tf.keras.layers.Layer):
149
+ def __init__(self,
150
+ *,
151
+ d_model,
152
+ num_heads,
153
+ dff,
154
+ dropout_rate=0.1):
155
+ super(DecoderLayer, self).__init__()
156
+
157
+ self.causal_self_attention = CausalSelfAttention(
158
+ num_heads=num_heads,
159
+ key_dim=d_model,
160
+ dropout=dropout_rate)
161
+
162
+ self.cross_attention = CrossAttention(
163
+ num_heads=num_heads,
164
+ key_dim=d_model,
165
+ dropout=dropout_rate)
166
+
167
+ self.ffn = FeedForward(d_model, dff)
168
+
169
+ def call(self, x, context):
170
+ x = self.causal_self_attention(x=x)
171
+ x = self.cross_attention(x=x, context=context)
172
+
173
+ # Cache the last attention scores for plotting later
174
+ self.last_attn_scores = self.cross_attention.last_attn_scores
175
+
176
+ x = self.ffn(x) # Shape `(batch_size, seq_len, d_model)`.
177
+ return x
178
+
179
+ class Decoder(tf.keras.layers.Layer):
180
+ def __init__(self, *, num_layers, d_model, num_heads,embedding_matrix, dff, vocab_size,
181
+ dropout_rate=0.1):
182
+ super(Decoder, self).__init__()
183
+
184
+ self.d_model = d_model
185
+ self.num_layers = num_layers
186
+ self.embedding_matrix = embedding_matrix
187
+
188
+ self.pos_embedding = PositionalEmbedding(vocab_size=vocab_size,
189
+ d_model=d_model,embedding_matrix=embedding_matrix)
190
+ self.dropout = tf.keras.layers.Dropout(dropout_rate)
191
+ self.dec_layers = [
192
+ DecoderLayer(d_model=d_model, num_heads=num_heads,
193
+ dff=dff, dropout_rate=dropout_rate)
194
+ for _ in range(num_layers)]
195
+
196
+ self.last_attn_scores = None
197
+
198
+ def call(self, x, context):
199
+ # `x` is token-IDs shape (batch, target_seq_len)
200
+ x = self.pos_embedding(x) # (batch_size, target_seq_len, d_model)
201
+
202
+ x = self.dropout(x)
203
+
204
+ for i in range(self.num_layers):
205
+ x = self.dec_layers[i](x, context)
206
+
207
+ self.last_attn_scores = self.dec_layers[-1].last_attn_scores
208
+
209
+ # The shape of x is (batch_size, target_seq_len, d_model).
210
+ return x
211
+
212
+ class Transformer(tf.keras.Model):
213
+ def __init__(self, *, num_layers, d_model, num_heads,en_embedding_matrix,de_embedding_matrix, dff,
214
+ input_vocab_size, target_vocab_size, dropout_rate=0.1):
215
+ super().__init__()
216
+ self.encoder = Encoder(num_layers=num_layers, d_model=d_model,
217
+ num_heads=num_heads,embedding_matrix= en_embedding_matrix, dff=dff,
218
+ vocab_size=input_vocab_size,
219
+ dropout_rate=dropout_rate)
220
+
221
+ self.decoder = Decoder(num_layers=num_layers, d_model=d_model,
222
+ num_heads=num_heads, embedding_matrix=de_embedding_matrix,dff=dff,
223
+ vocab_size=target_vocab_size,
224
+ dropout_rate=dropout_rate)
225
+
226
+ self.final_layer = tf.keras.layers.Dense(target_vocab_size)
227
+
228
+ def call(self, inputs):
229
+ # To use a Keras model with `.fit` you must pass all your inputs in the
230
+ # first argument.
231
+ context, x = inputs
232
+
233
+ context = self.encoder(context) # (batch_size, context_len, d_model)
234
+
235
+ x = self.decoder(x, context) # (batch_size, target_len, d_model)
236
+
237
+ # Final linear layer output.
238
+ logits = self.final_layer(x) # (batch_size, target_len, target_vocab_size)
239
+
240
+ try:
241
+ # Drop the keras mask, so it doesn't scale the losses/metrics.
242
+ # b/250038731
243
+ del logits._keras_mask
244
+ except AttributeError:
245
+ pass
246
+
247
+ # Return the final output and the attention weights.
248
+ return logits
249
+
250
+
251
+
252
+
253
+
254
+
255
+
256
+
257
+
258
+
259
+
260
+
261
+
262
+
263
+
264
+
265
+
266
+
267
+
268
+
269
+
270
+
271
+
272
+
273
+
translator.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tensorflow as tf
2
+ import numpy as np
3
+ from config import config
4
+
5
+ class Translator(tf.Module):
6
+ def __init__(self, sp_model_en, sp_model_ur, transformer):
7
+ self.sp_model_en = sp_model_en
8
+ self.sp_model_ur = sp_model_ur
9
+ self.transformer = transformer
10
+
11
+ def __call__(self, sentence, max_length=config.sequence_length):
12
+ sentence = tf.constant(sentence)
13
+ if len(sentence.shape) == 0:
14
+ sentence = sentence[tf.newaxis]
15
+
16
+ # Tokenize the English sentence
17
+ sentence = self.sp_model_en.tokenize(sentence).to_tensor()
18
+
19
+ encoder_input = sentence
20
+
21
+ # Initialize the output for Urdu with `[START]` token
22
+ start = self.sp_model_ur.tokenize([''])[0][0][tf.newaxis]
23
+ end = self.sp_model_ur.tokenize([''])[0][1][tf.newaxis]
24
+
25
+ output_array = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True)
26
+ output_array = output_array.write(0, start)
27
+
28
+ for i in tf.range(max_length):
29
+ output = tf.transpose(output_array.stack())
30
+ predictions = self.transformer([encoder_input, output], training=False)
31
+
32
+ predictions = predictions[:, -1:, :] # Shape `(batch_size, 1, vocab_size)`
33
+
34
+ predicted_id = tf.argmax(predictions, axis=-1)
35
+ predicted_id = tf.cast(predicted_id, tf.int32)
36
+
37
+ output_array = output_array.write(i+1, predicted_id[0])
38
+
39
+ if predicted_id == end:
40
+ break
41
+
42
+ output = tf.transpose(output_array.stack())
43
+ text = self.sp_model_ur.detokenize(output)[0] # Shape: `()`
44
+
45
+ return text.numpy().decode('utf-8')