Spaces:
Sleeping
Sleeping
Zai
commited on
Commit
·
4c2795d
1
Parent(s):
1550002
Make required modules and files
Browse files- burmese_gpt/config.py +6 -6
- burmese_gpt/data/__init__.py +0 -0
- burmese_gpt/{dataset.py → data/dataset.py} +0 -0
- burmese_gpt/model.py +0 -59
- burmese_gpt/models/__init__.py +0 -0
- burmese_gpt/models/model.py +8 -0
- burmese_gpt/training/__init__.py +0 -0
- requirements.txt +6 -1
- scripts/generate.py +0 -0
- space.py → scripts/space.py +5 -5
- setup.py +1 -0
burmese_gpt/config.py
CHANGED
@@ -5,16 +5,16 @@ class ModelConfig:
|
|
5 |
vocab_size: int = 30000
|
6 |
embed_dim: int = 256
|
7 |
num_heads: int = 8
|
8 |
-
num_layers: int =
|
9 |
dropout: float = 0.1
|
10 |
-
max_seq_len: int =
|
11 |
|
12 |
@dataclass
|
13 |
class TrainingConfig:
|
14 |
batch_size: int = 32
|
15 |
learning_rate: float = 5e-5
|
16 |
-
num_epochs: int =
|
17 |
-
warmup_steps: int = 1000
|
18 |
-
weight_decay: float = 0.01
|
19 |
checkpoint_dir: str = "checkpoints"
|
20 |
-
log_dir: str = "logs"
|
|
|
|
|
|
5 |
vocab_size: int = 30000
|
6 |
embed_dim: int = 256
|
7 |
num_heads: int = 8
|
8 |
+
num_layers: int = 4
|
9 |
dropout: float = 0.1
|
10 |
+
max_seq_len: int = 128
|
11 |
|
12 |
@dataclass
|
13 |
class TrainingConfig:
|
14 |
batch_size: int = 32
|
15 |
learning_rate: float = 5e-5
|
16 |
+
num_epochs: int = 5
|
|
|
|
|
17 |
checkpoint_dir: str = "checkpoints"
|
18 |
+
log_dir: str = "logs"
|
19 |
+
save_every: int = 1
|
20 |
+
eval_every: int = 1
|
burmese_gpt/data/__init__.py
ADDED
File without changes
|
burmese_gpt/{dataset.py → data/dataset.py}
RENAMED
File without changes
|
burmese_gpt/model.py
DELETED
@@ -1,59 +0,0 @@
|
|
1 |
-
import torch
|
2 |
-
from torch import nn
|
3 |
-
from torch.nn import TransformerEncoder, TransformerEncoderLayer
|
4 |
-
from .config import ModelConfig
|
5 |
-
|
6 |
-
class BurmeseGPT(nn.Module):
|
7 |
-
def __init__(self,config:ModelConfig):
|
8 |
-
super(BurmeseGPT, self).__init__()
|
9 |
-
self.config = config
|
10 |
-
# Token and positional embeddings
|
11 |
-
self.token_embedding = nn.Embedding(config.vocab_size, config.embed_dim)
|
12 |
-
self.pos_embedding = nn.Embedding(config.max_seq_len, config.embed_dim)
|
13 |
-
|
14 |
-
# Transformer layers
|
15 |
-
encoder_layers = TransformerEncoderLayer(
|
16 |
-
d_model=config.embed_dim,
|
17 |
-
nhead=config.num_heads,
|
18 |
-
dim_feedforward=4 * config.embed_dim,
|
19 |
-
dropout=config.dropout,
|
20 |
-
batch_first=True
|
21 |
-
)
|
22 |
-
self.transformer = TransformerEncoder(encoder_layers, config.num_layers)
|
23 |
-
|
24 |
-
# Output layer
|
25 |
-
self.ln = nn.LayerNorm(config.embed_dim)
|
26 |
-
self.fc = nn.Linear(config.embed_dim, config.vocab_size)
|
27 |
-
|
28 |
-
# Initialize weights
|
29 |
-
self.apply(self._init_weights)
|
30 |
-
|
31 |
-
def _init_weights(self, module):
|
32 |
-
if isinstance(module, nn.Linear):
|
33 |
-
torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
|
34 |
-
if module.bias is not None:
|
35 |
-
torch.nn.init.zeros_(module.bias)
|
36 |
-
elif isinstance(module, nn.Embedding):
|
37 |
-
torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
|
38 |
-
|
39 |
-
def forward(self, x, attention_mask=None):
|
40 |
-
device = x.device
|
41 |
-
seq_len = x.size(1)
|
42 |
-
|
43 |
-
# Create position ids
|
44 |
-
position_ids = torch.arange(seq_len, dtype=torch.long, device=device).unsqueeze(0)
|
45 |
-
|
46 |
-
# Get embeddings
|
47 |
-
token_embeds = self.token_embedding(x)
|
48 |
-
pos_embeds = self.pos_embedding(position_ids)
|
49 |
-
x = token_embeds + pos_embeds
|
50 |
-
|
51 |
-
# Create causal mask
|
52 |
-
mask = torch.triu(torch.ones(seq_len, seq_len, device=device), diagonal=1).bool()
|
53 |
-
|
54 |
-
# Transformer
|
55 |
-
x = self.transformer(x, mask=mask, src_key_padding_mask=attention_mask)
|
56 |
-
x = self.ln(x)
|
57 |
-
logits = self.fc(x)
|
58 |
-
|
59 |
-
return logits
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
burmese_gpt/models/__init__.py
ADDED
File without changes
|
burmese_gpt/models/model.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from torch import nn
|
2 |
+
from burmese_gpt.config import ModelConfig
|
3 |
+
|
4 |
+
class BurmeseGPT(nn.Module):
|
5 |
+
def __init__(self,config:ModelConfig):
|
6 |
+
super(BurmeseGPT, self).__init__()
|
7 |
+
self.config = config
|
8 |
+
# Continue the rest
|
burmese_gpt/training/__init__.py
ADDED
File without changes
|
requirements.txt
CHANGED
@@ -1 +1,6 @@
|
|
1 |
-
streamlit
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
streamlit>=1.20.0
|
2 |
+
torch>=2.0.0
|
3 |
+
transformers>=4.30.0
|
4 |
+
datasets>=2.12.0
|
5 |
+
tqdm>=4.65.0
|
6 |
+
numpy>=1.24.0
|
scripts/generate.py
ADDED
File without changes
|
space.py → scripts/space.py
RENAMED
@@ -9,7 +9,7 @@ st.set_page_config(
|
|
9 |
|
10 |
# Create a sidebar with a title and a brief description
|
11 |
st.sidebar.title("Burmese GPT")
|
12 |
-
st.sidebar.write("A language
|
13 |
|
14 |
# Create a selectbox to choose the view
|
15 |
view_options = ["Sampling", "Chat Interface"]
|
@@ -18,7 +18,7 @@ selected_view = st.sidebar.selectbox("Select a view:", view_options)
|
|
18 |
# Create a main area
|
19 |
if selected_view == "Sampling":
|
20 |
st.title("Sampling")
|
21 |
-
st.write("Generate text using the pre-trained
|
22 |
|
23 |
# Create a text input field for the prompt
|
24 |
prompt = st.text_input("Prompt:", value="")
|
@@ -37,15 +37,15 @@ if selected_view == "Sampling":
|
|
37 |
|
38 |
elif selected_view == "Chat Interface":
|
39 |
st.title("Chat Interface")
|
40 |
-
st.write("Chat with the fine-tuned
|
41 |
|
42 |
# Create a text input field for the user input
|
43 |
user_input = st.text_input("You:", value="")
|
44 |
|
45 |
-
# Create a button to send the input to the
|
46 |
send_button = st.button("Send")
|
47 |
|
48 |
-
# Create an output area to display the
|
49 |
response_area = st.text_area("Model:", height=200, disabled=True)
|
50 |
|
51 |
# Add some space between the input and output areas
|
|
|
9 |
|
10 |
# Create a sidebar with a title and a brief description
|
11 |
st.sidebar.title("Burmese GPT")
|
12 |
+
st.sidebar.write("A language models app for generating and chatting in Burmese.")
|
13 |
|
14 |
# Create a selectbox to choose the view
|
15 |
view_options = ["Sampling", "Chat Interface"]
|
|
|
18 |
# Create a main area
|
19 |
if selected_view == "Sampling":
|
20 |
st.title("Sampling")
|
21 |
+
st.write("Generate text using the pre-trained models:")
|
22 |
|
23 |
# Create a text input field for the prompt
|
24 |
prompt = st.text_input("Prompt:", value="")
|
|
|
37 |
|
38 |
elif selected_view == "Chat Interface":
|
39 |
st.title("Chat Interface")
|
40 |
+
st.write("Chat with the fine-tuned models:")
|
41 |
|
42 |
# Create a text input field for the user input
|
43 |
user_input = st.text_input("You:", value="")
|
44 |
|
45 |
+
# Create a button to send the input to the models
|
46 |
send_button = st.button("Send")
|
47 |
|
48 |
+
# Create an output area to display the models's response
|
49 |
response_area = st.text_area("Model:", height=200, disabled=True)
|
50 |
|
51 |
# Add some space between the input and output areas
|
setup.py
CHANGED
@@ -2,4 +2,5 @@ from setuptools import setup
|
|
2 |
|
3 |
setup(
|
4 |
name="burmese_gpt",
|
|
|
5 |
)
|
|
|
2 |
|
3 |
setup(
|
4 |
name="burmese_gpt",
|
5 |
+
|
6 |
)
|