Zai commited on
Commit
4c2795d
·
1 Parent(s): 1550002

Make required modules and files

Browse files
burmese_gpt/config.py CHANGED
@@ -5,16 +5,16 @@ class ModelConfig:
5
  vocab_size: int = 30000
6
  embed_dim: int = 256
7
  num_heads: int = 8
8
- num_layers: int = 6
9
  dropout: float = 0.1
10
- max_seq_len: int = 512
11
 
12
  @dataclass
13
  class TrainingConfig:
14
  batch_size: int = 32
15
  learning_rate: float = 5e-5
16
- num_epochs: int = 10
17
- warmup_steps: int = 1000
18
- weight_decay: float = 0.01
19
  checkpoint_dir: str = "checkpoints"
20
- log_dir: str = "logs"
 
 
 
5
  vocab_size: int = 30000
6
  embed_dim: int = 256
7
  num_heads: int = 8
8
+ num_layers: int = 4
9
  dropout: float = 0.1
10
+ max_seq_len: int = 128
11
 
12
  @dataclass
13
  class TrainingConfig:
14
  batch_size: int = 32
15
  learning_rate: float = 5e-5
16
+ num_epochs: int = 5
 
 
17
  checkpoint_dir: str = "checkpoints"
18
+ log_dir: str = "logs"
19
+ save_every: int = 1
20
+ eval_every: int = 1
burmese_gpt/data/__init__.py ADDED
File without changes
burmese_gpt/{dataset.py → data/dataset.py} RENAMED
File without changes
burmese_gpt/model.py DELETED
@@ -1,59 +0,0 @@
1
- import torch
2
- from torch import nn
3
- from torch.nn import TransformerEncoder, TransformerEncoderLayer
4
- from .config import ModelConfig
5
-
6
- class BurmeseGPT(nn.Module):
7
- def __init__(self,config:ModelConfig):
8
- super(BurmeseGPT, self).__init__()
9
- self.config = config
10
- # Token and positional embeddings
11
- self.token_embedding = nn.Embedding(config.vocab_size, config.embed_dim)
12
- self.pos_embedding = nn.Embedding(config.max_seq_len, config.embed_dim)
13
-
14
- # Transformer layers
15
- encoder_layers = TransformerEncoderLayer(
16
- d_model=config.embed_dim,
17
- nhead=config.num_heads,
18
- dim_feedforward=4 * config.embed_dim,
19
- dropout=config.dropout,
20
- batch_first=True
21
- )
22
- self.transformer = TransformerEncoder(encoder_layers, config.num_layers)
23
-
24
- # Output layer
25
- self.ln = nn.LayerNorm(config.embed_dim)
26
- self.fc = nn.Linear(config.embed_dim, config.vocab_size)
27
-
28
- # Initialize weights
29
- self.apply(self._init_weights)
30
-
31
- def _init_weights(self, module):
32
- if isinstance(module, nn.Linear):
33
- torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
34
- if module.bias is not None:
35
- torch.nn.init.zeros_(module.bias)
36
- elif isinstance(module, nn.Embedding):
37
- torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
38
-
39
- def forward(self, x, attention_mask=None):
40
- device = x.device
41
- seq_len = x.size(1)
42
-
43
- # Create position ids
44
- position_ids = torch.arange(seq_len, dtype=torch.long, device=device).unsqueeze(0)
45
-
46
- # Get embeddings
47
- token_embeds = self.token_embedding(x)
48
- pos_embeds = self.pos_embedding(position_ids)
49
- x = token_embeds + pos_embeds
50
-
51
- # Create causal mask
52
- mask = torch.triu(torch.ones(seq_len, seq_len, device=device), diagonal=1).bool()
53
-
54
- # Transformer
55
- x = self.transformer(x, mask=mask, src_key_padding_mask=attention_mask)
56
- x = self.ln(x)
57
- logits = self.fc(x)
58
-
59
- return logits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
burmese_gpt/models/__init__.py ADDED
File without changes
burmese_gpt/models/model.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from torch import nn
2
+ from burmese_gpt.config import ModelConfig
3
+
4
+ class BurmeseGPT(nn.Module):
5
+ def __init__(self,config:ModelConfig):
6
+ super(BurmeseGPT, self).__init__()
7
+ self.config = config
8
+ # Continue the rest
burmese_gpt/training/__init__.py ADDED
File without changes
requirements.txt CHANGED
@@ -1 +1,6 @@
1
- streamlit
 
 
 
 
 
 
1
+ streamlit>=1.20.0
2
+ torch>=2.0.0
3
+ transformers>=4.30.0
4
+ datasets>=2.12.0
5
+ tqdm>=4.65.0
6
+ numpy>=1.24.0
scripts/generate.py ADDED
File without changes
space.py → scripts/space.py RENAMED
@@ -9,7 +9,7 @@ st.set_page_config(
9
 
10
  # Create a sidebar with a title and a brief description
11
  st.sidebar.title("Burmese GPT")
12
- st.sidebar.write("A language model app for generating and chatting in Burmese.")
13
 
14
  # Create a selectbox to choose the view
15
  view_options = ["Sampling", "Chat Interface"]
@@ -18,7 +18,7 @@ selected_view = st.sidebar.selectbox("Select a view:", view_options)
18
  # Create a main area
19
  if selected_view == "Sampling":
20
  st.title("Sampling")
21
- st.write("Generate text using the pre-trained model:")
22
 
23
  # Create a text input field for the prompt
24
  prompt = st.text_input("Prompt:", value="")
@@ -37,15 +37,15 @@ if selected_view == "Sampling":
37
 
38
  elif selected_view == "Chat Interface":
39
  st.title("Chat Interface")
40
- st.write("Chat with the fine-tuned model:")
41
 
42
  # Create a text input field for the user input
43
  user_input = st.text_input("You:", value="")
44
 
45
- # Create a button to send the input to the model
46
  send_button = st.button("Send")
47
 
48
- # Create an output area to display the model's response
49
  response_area = st.text_area("Model:", height=200, disabled=True)
50
 
51
  # Add some space between the input and output areas
 
9
 
10
  # Create a sidebar with a title and a brief description
11
  st.sidebar.title("Burmese GPT")
12
+ st.sidebar.write("A language models app for generating and chatting in Burmese.")
13
 
14
  # Create a selectbox to choose the view
15
  view_options = ["Sampling", "Chat Interface"]
 
18
  # Create a main area
19
  if selected_view == "Sampling":
20
  st.title("Sampling")
21
+ st.write("Generate text using the pre-trained models:")
22
 
23
  # Create a text input field for the prompt
24
  prompt = st.text_input("Prompt:", value="")
 
37
 
38
  elif selected_view == "Chat Interface":
39
  st.title("Chat Interface")
40
+ st.write("Chat with the fine-tuned models:")
41
 
42
  # Create a text input field for the user input
43
  user_input = st.text_input("You:", value="")
44
 
45
+ # Create a button to send the input to the models
46
  send_button = st.button("Send")
47
 
48
+ # Create an output area to display the models's response
49
  response_area = st.text_area("Model:", height=200, disabled=True)
50
 
51
  # Add some space between the input and output areas
setup.py CHANGED
@@ -2,4 +2,5 @@ from setuptools import setup
2
 
3
  setup(
4
  name="burmese_gpt",
 
5
  )
 
2
 
3
  setup(
4
  name="burmese_gpt",
5
+
6
  )