Spaces:

huzaifanafees
/

Eng-Urdu-Translation

Sleeping

App Files Files

huzaifanafees commited on Jan 7, 2024

Commit

97fe9c2

1 Parent(s): f0e3a09

Upload 4 files

Browse files

Files changed (4) hide show

config.py +12 -0
load_model.py +27 -0
transformer.py +273 -0
translator.py +45 -0

config.py ADDED Viewed

	@@ -0,0 +1,12 @@

+class Config:
+    vocab_size = 28000 # Vocabulary Size
+    sequence_length = 35
+    batch_size = 128
+    validation_split = 0.20
+    embed_dim = 300
+    num_layers = 4
+    latent_dim = 2048
+    num_heads = 12
+    epochs = 50 # Number of Epochs to train
+    is_training = True
+config = Config()

load_model.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from transformer import Transformer
+import tensorflow_text as tf_text
+import tensorflow as tf
+from config import config
+def load_model(en_emb_matrix, de_emb_matrix, model_path, config):
+    # Initialize and rebuild your Transformer model
+    # (Make sure to replace '...' with actual parameters)
+    model = Transformer(
+        num_layers=config.num_layers,
+        d_model=config.embed_dim,
+        num_heads=config.num_heads,
+        en_embedding_matrix=en_emb_matrix,
+        de_embedding_matrix=de_emb_matrix,
+        dff=config.latent_dim,
+        input_vocab_size=config.vocab_size,
+        target_vocab_size=config.vocab_size,
+        dropout_rate=0.2
+    )
+    model.load_weights(model_path)
+    return model
+def load_sp_model(path_en,path_ur):
+    sp_model_en = tf_text.SentencepieceTokenizer(model=tf.io.gfile.GFile(path_en, 'rb').read(),add_bos=True,add_eos=True)
+    sp_model_ur = tf_text.SentencepieceTokenizer(model=tf.io.gfile.GFile(path_ur, 'rb').read(),reverse=True,add_bos=True,add_eos=True)
+    return sp_model_en, sp_model_ur

transformer.py ADDED Viewed

	@@ -0,0 +1,273 @@

+import tensorflow as tf
+import numpy as np
+from config import config
+def positional_encoding(length, depth):
+  depth = depth/2
+  positions = np.arange(length)[:, np.newaxis]     # (seq, 1)
+  depths = np.arange(depth)[np.newaxis, :]/depth   # (1, depth)
+  angle_rates = 1 / (10000**depths)         # (1, depth)
+  angle_rads = positions * angle_rates      # (pos, depth)
+  pos_encoding = np.concatenate(
+      [np.sin(angle_rads), np.cos(angle_rads)],
+      axis=-1)
+  return tf.cast(pos_encoding, dtype=tf.float32)
+class PositionalEmbedding(tf.keras.layers.Layer):
+  def __init__(self, vocab_size, d_model,embedding_matrix):
+    super().__init__()
+    self.d_model = d_model
+    self.embedding = tf.keras.layers.Embedding(vocab_size, d_model,
+                                               embeddings_initializer=tf.keras.initializers.Constant(embedding_matrix),
+                                               mask_zero=True)
+    self.pos_encoding = positional_encoding(length=config.latent_dim, depth=d_model)
+  def compute_mask(self, *args, **kwargs):
+    return self.embedding.compute_mask(*args, **kwargs)
+  def call(self, x):
+    length = tf.shape(x)[1]
+    x = self.embedding(x)
+    # This factor sets the relative scale of the embedding and positonal_encoding.
+    x *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
+    x = x + self.pos_encoding[tf.newaxis, :length, :]
+    return x
+class BaseAttention(tf.keras.layers.Layer):
+  def __init__(self, **kwargs):
+    super().__init__()
+    self.mha = tf.keras.layers.MultiHeadAttention(**kwargs)
+    self.layernorm = tf.keras.layers.LayerNormalization()
+    self.add = tf.keras.layers.Add()
+class CrossAttention(BaseAttention):
+  def call(self, x, context):
+    attn_output, attn_scores = self.mha(
+        query=x,
+        key=context,
+        value=context,
+        return_attention_scores=True)
+    # Cache the attention scores for plotting later.
+    self.last_attn_scores = attn_scores
+    x = self.add([x, attn_output])
+    x = self.layernorm(x)
+    return x
+class GlobalSelfAttention(BaseAttention):
+  def call(self, x):
+    attn_output = self.mha(
+        query=x,
+        value=x,
+        key=x)
+    x = self.add([x, attn_output])
+    x = self.layernorm(x)
+    return x
+class CausalSelfAttention(BaseAttention):
+  def call(self, x):
+    attn_output = self.mha(
+        query=x,
+        value=x,
+        key=x,
+        use_causal_mask = True)
+    x = self.add([x, attn_output])
+    x = self.layernorm(x)
+    return x
+class FeedForward(tf.keras.layers.Layer):
+  def __init__(self, d_model, dff, dropout_rate=0.1):
+    super().__init__()
+    self.seq = tf.keras.Sequential([
+      tf.keras.layers.Dense(dff, activation='relu'),
+      tf.keras.layers.Dense(d_model),
+      tf.keras.layers.Dropout(dropout_rate)
+    ])
+    self.add = tf.keras.layers.Add()
+    self.layer_norm = tf.keras.layers.LayerNormalization()
+  def call(self, x):
+    x = self.add([x, self.seq(x)])
+    x = self.layer_norm(x)
+    return x
+class EncoderLayer(tf.keras.layers.Layer):
+  def __init__(self,*, d_model, num_heads, dff, dropout_rate=0.1):
+    super().__init__()
+    self.self_attention = GlobalSelfAttention(
+        num_heads=num_heads,
+        key_dim=d_model,
+        dropout=dropout_rate)
+    self.ffn = FeedForward(d_model, dff)
+  def call(self, x):
+    x = self.self_attention(x)
+    x = self.ffn(x)
+    return x
+class Encoder(tf.keras.layers.Layer):
+  def __init__(self, *, num_layers, d_model, num_heads,embedding_matrix,
+               dff, vocab_size, dropout_rate=0.1):
+    super().__init__()
+    self.d_model = d_model
+    self.num_layers = num_layers
+    self.embedding_matrix = embedding_matrix
+    self.pos_embedding = PositionalEmbedding(
+        vocab_size=vocab_size, d_model=d_model,embedding_matrix=embedding_matrix)
+    self.enc_layers = [
+        EncoderLayer(d_model=d_model,
+                     num_heads=num_heads,
+                     dff=dff,
+                     dropout_rate=dropout_rate)
+        for _ in range(num_layers)]
+    self.dropout = tf.keras.layers.Dropout(dropout_rate)
+  def call(self, x):
+    # `x` is token-IDs shape: (batch, seq_len)
+    x = self.pos_embedding(x)  # Shape `(batch_size, seq_len, d_model)`.
+    # Add dropout.
+    x = self.dropout(x)
+    for i in range(self.num_layers):
+      x = self.enc_layers[i](x)
+    return x  # Shape `(batch_size, seq_len, d_model)`.
+class DecoderLayer(tf.keras.layers.Layer):
+  def __init__(self,
+               *,
+               d_model,
+               num_heads,
+               dff,
+               dropout_rate=0.1):
+    super(DecoderLayer, self).__init__()
+    self.causal_self_attention = CausalSelfAttention(
+        num_heads=num_heads,
+        key_dim=d_model,
+        dropout=dropout_rate)
+    self.cross_attention = CrossAttention(
+        num_heads=num_heads,
+        key_dim=d_model,
+        dropout=dropout_rate)
+    self.ffn = FeedForward(d_model, dff)
+  def call(self, x, context):
+    x = self.causal_self_attention(x=x)
+    x = self.cross_attention(x=x, context=context)
+    # Cache the last attention scores for plotting later
+    self.last_attn_scores = self.cross_attention.last_attn_scores
+    x = self.ffn(x)  # Shape `(batch_size, seq_len, d_model)`.
+    return x
+class Decoder(tf.keras.layers.Layer):
+  def __init__(self, *, num_layers, d_model, num_heads,embedding_matrix, dff, vocab_size,
+               dropout_rate=0.1):
+    super(Decoder, self).__init__()
+    self.d_model = d_model
+    self.num_layers = num_layers
+    self.embedding_matrix = embedding_matrix
+    self.pos_embedding = PositionalEmbedding(vocab_size=vocab_size,
+                                             d_model=d_model,embedding_matrix=embedding_matrix)
+    self.dropout = tf.keras.layers.Dropout(dropout_rate)
+    self.dec_layers = [
+        DecoderLayer(d_model=d_model, num_heads=num_heads,
+                     dff=dff, dropout_rate=dropout_rate)
+        for _ in range(num_layers)]
+    self.last_attn_scores = None
+  def call(self, x, context):
+    # `x` is token-IDs shape (batch, target_seq_len)
+    x = self.pos_embedding(x)  # (batch_size, target_seq_len, d_model)
+    x = self.dropout(x)
+    for i in range(self.num_layers):
+      x  = self.dec_layers[i](x, context)
+    self.last_attn_scores = self.dec_layers[-1].last_attn_scores
+    # The shape of x is (batch_size, target_seq_len, d_model).
+    return x
+class Transformer(tf.keras.Model):
+  def __init__(self, *, num_layers, d_model, num_heads,en_embedding_matrix,de_embedding_matrix, dff,
+               input_vocab_size, target_vocab_size, dropout_rate=0.1):
+    super().__init__()
+    self.encoder = Encoder(num_layers=num_layers, d_model=d_model,
+                           num_heads=num_heads,embedding_matrix= en_embedding_matrix, dff=dff,
+                           vocab_size=input_vocab_size,
+                           dropout_rate=dropout_rate)
+    self.decoder = Decoder(num_layers=num_layers, d_model=d_model,
+                           num_heads=num_heads, embedding_matrix=de_embedding_matrix,dff=dff,
+                           vocab_size=target_vocab_size,
+                           dropout_rate=dropout_rate)
+    self.final_layer = tf.keras.layers.Dense(target_vocab_size)
+  def call(self, inputs):
+    # To use a Keras model with `.fit` you must pass all your inputs in the
+    # first argument.
+    context, x  = inputs
+    context = self.encoder(context)  # (batch_size, context_len, d_model)
+    x = self.decoder(x, context)  # (batch_size, target_len, d_model)
+    # Final linear layer output.
+    logits = self.final_layer(x)  # (batch_size, target_len, target_vocab_size)
+    try:
+      # Drop the keras mask, so it doesn't scale the losses/metrics.
+      # b/250038731
+      del logits._keras_mask
+    except AttributeError:
+      pass
+    # Return the final output and the attention weights.
+    return logits

translator.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import tensorflow as tf
+import numpy as np
+from config import config
+class Translator(tf.Module):
+    def __init__(self, sp_model_en, sp_model_ur, transformer):
+        self.sp_model_en = sp_model_en
+        self.sp_model_ur = sp_model_ur
+        self.transformer = transformer
+    def __call__(self, sentence, max_length=config.sequence_length):
+        sentence = tf.constant(sentence)
+        if len(sentence.shape) == 0:
+            sentence = sentence[tf.newaxis]
+        # Tokenize the English sentence
+        sentence = self.sp_model_en.tokenize(sentence).to_tensor()
+        encoder_input = sentence
+        # Initialize the output for Urdu with `[START]` token
+        start = self.sp_model_ur.tokenize([''])[0][0][tf.newaxis]
+        end = self.sp_model_ur.tokenize([''])[0][1][tf.newaxis]
+        output_array = tf.TensorArray(dtype=tf.int32, size=0, dynamic_size=True)
+        output_array = output_array.write(0, start)
+        for i in tf.range(max_length):
+            output = tf.transpose(output_array.stack())
+            predictions = self.transformer([encoder_input, output], training=False)
+            predictions = predictions[:, -1:, :]  # Shape `(batch_size, 1, vocab_size)`
+            predicted_id = tf.argmax(predictions, axis=-1)
+            predicted_id = tf.cast(predicted_id, tf.int32)
+            output_array = output_array.write(i+1, predicted_id[0])
+            if predicted_id == end:
+                break
+        output = tf.transpose(output_array.stack())
+        text = self.sp_model_ur.detokenize(output)[0]  # Shape: `()`
+        return text.numpy().decode('utf-8')