Zai commited on
Commit
bd78e12
·
1 Parent(s): 6feb6a4

Reset project for cleaner approach

Browse files
.github/workflows/hugging-face.yaml DELETED
@@ -1,42 +0,0 @@
1
- name: Uploading on Huggingface
2
- on:
3
- push:
4
- branches: [main]
5
- workflow_dispatch:
6
-
7
- jobs:
8
- sync-to-hub:
9
- runs-on: ubuntu-latest
10
- steps:
11
- - uses: actions/checkout@v3
12
- with:
13
- fetch-depth: 0
14
- lfs: true
15
- - name: Set Git identity
16
- run: |
17
- git config --global user.email "[email protected]"
18
- git config --global user.name "GitHub Actions"
19
-
20
- - name: Update README.md
21
- run: |
22
- tmp_file=$(mktemp)
23
- echo "---" >> $tmp_file
24
- echo "title: Burmese GPT" >> $tmp_file
25
- echo "emoji: 💫️" >> $tmp_file
26
- echo "colorFrom: yellow" >> $tmp_file
27
- echo "colorTo: blue" >> $tmp_file
28
- echo "sdk: streamlit" >> $tmp_file
29
- echo "sdk_version: 1.29.0" >> $tmp_file
30
- echo "app_file: space.py" >> $tmp_file
31
- echo "pinned: false" >> $tmp_file
32
- echo "license: openrail" >> $tmp_file
33
- echo "---" >> $tmp_file
34
- echo "" >> $tmp_file
35
- cat README.md >> $tmp_file
36
- mv $tmp_file README.md
37
- git add README.md
38
- git commit -m "Updated README.md"
39
- - name: Push to hub
40
- env:
41
- HF_TOKEN: ${{ secrets.HF_TOKEN }}
42
- run: git push https://zaibutcooler:[email protected]/spaces/zaibutcooler/burmese-gpt --force main
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
.idea/burmese-gpt.iml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <module type="PYTHON_MODULE" version="4">
3
+ <component name="NewModuleRootManager">
4
+ <content url="file://$MODULE_DIR$" />
5
+ <orderEntry type="jdk" jdkName="/opt/anaconda3" jdkType="Python SDK" />
6
+ <orderEntry type="sourceFolder" forTests="false" />
7
+ </component>
8
+ </module>
.idea/inspectionProfiles/profiles_settings.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <component name="InspectionProjectProfileManager">
2
+ <settings>
3
+ <option name="USE_PROJECT_PROFILE" value="false" />
4
+ <version value="1.0" />
5
+ </settings>
6
+ </component>
.idea/misc.xml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="Black">
4
+ <option name="sdkName" value="/opt/anaconda3" />
5
+ </component>
6
+ <component name="ProjectRootManager" version="2" project-jdk-name="/opt/anaconda3" project-jdk-type="Python SDK" />
7
+ </project>
.idea/modules.xml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="ProjectModuleManager">
4
+ <modules>
5
+ <module fileurl="file://$PROJECT_DIR$/.idea/burmese-gpt.iml" filepath="$PROJECT_DIR$/.idea/burmese-gpt.iml" />
6
+ </modules>
7
+ </component>
8
+ </project>
.idea/vcs.xml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <project version="4">
3
+ <component name="VcsDirectoryMappings">
4
+ <mapping directory="" vcs="Git" />
5
+ </component>
6
+ </project>
README.md CHANGED
@@ -1,2 +1 @@
1
-
2
- # Burmese GPT
 
1
+ # Burmese GPT V2
 
burmese_gpt/__init__.py ADDED
File without changes
burmese_gpt/config.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+
3
+ @dataclass
4
+ class ModelConfig:
5
+ vocab_size: int = 30000
6
+ embed_dim: int = 256
7
+ num_heads: int = 8
8
+ num_layers: int = 6
9
+ dropout: float = 0.1
10
+ max_seq_len: int = 512
11
+
12
+ @dataclass
13
+ class TrainingConfig:
14
+ batch_size: int = 32
15
+ learning_rate: float = 5e-5
16
+ num_epochs: int = 10
17
+ warmup_steps: int = 1000
18
+ weight_decay: float = 0.01
19
+ checkpoint_dir: str = "checkpoints"
20
+ log_dir: str = "logs"
burmese_gpt/model.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+
4
+ class BurmeseGPT(nn.Module):
5
+ def __init__(self):
6
+ super(BurmeseGPT, self).__init__()
7
+ pass
burmesegpt/__init__.py DELETED
@@ -1 +0,0 @@
1
- from .core import BurmeseGpt
 
 
burmesegpt/config.py DELETED
@@ -1,9 +0,0 @@
1
- import torch
2
-
3
- class Config:
4
- def __init__(self) -> None:
5
- pass
6
-
7
- config = Config()
8
-
9
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
 
 
 
 
 
 
 
 
 
burmesegpt/core.py DELETED
@@ -1,46 +0,0 @@
1
- import torch
2
- from huggingface_hub import login
3
-
4
- from .models import SelfAttention,MLP,GPT
5
- from .tokenizer import Tokenizer
6
- from .config import Config
7
- from .data_prep import Data
8
-
9
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
10
-
11
- tokenizer = Tokenizer()
12
-
13
- class BurmeseGpt:
14
- def __init__(self,tk=tokenizer):
15
- self.attention = SelfAttention().to(device)
16
- self.mlp = MLP().to(device)
17
- self.config = Config().to(device)
18
- self.train_data = Data().to(device)
19
- self.tokenizer = tk
20
-
21
- def save_pretrained(self, name="burmese-gpt"):
22
- print("Uploading model...")
23
- self.model.save_pretrained(name)
24
- print(f"Model saved locally as '{name}'")
25
- self.model.push_to_hub(name)
26
- print(f"Model '{name}' uploaded to the Hugging Face Model Hub")
27
-
28
- def load_pretrained(self, model_id="zaibutcooler/burmese-gpt"):
29
- print("Loading model...")
30
- model = model.from_pretrained(model_id)
31
- print(f"Model '{model_id}' loaded successfully")
32
- return model
33
-
34
- def huggingface_login(self,token):
35
- login(token)
36
-
37
- def pretrain(self):
38
- pass
39
-
40
- def fine_tune(self):
41
- pass
42
-
43
- def generate(self,entry=''):
44
- result = None
45
-
46
- return result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
burmesegpt/data_prep.py DELETED
@@ -1,19 +0,0 @@
1
- # to preps data
2
- from datasets import load_dataset
3
- from torch.utils.data import Dataset
4
-
5
-
6
- class Data(Dataset):
7
- def __init__(self):
8
- super().__init__()
9
- self.texts = None
10
- self.data
11
-
12
- def __len__(self):
13
- return None
14
-
15
- def __getitem__(self, index):
16
- return self.trainset[index]
17
-
18
-
19
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
burmesegpt/models.py DELETED
@@ -1,24 +0,0 @@
1
- import torch
2
- from torch import nn
3
- from huggingface_hub import PyTorchModelHubMixin
4
-
5
- class SelfAttention(nn.Module,PyTorchModelHubMixin):
6
- def __init__(self):
7
- pass
8
-
9
- def forward(self):
10
- pass
11
-
12
- class MLP(nn.Module,PyTorchModelHubMixin):
13
- def __init__(self):
14
- pass
15
-
16
- def forward(self):
17
- pass
18
-
19
- class GPT(nn.Module,PyTorchModelHubMixin):
20
- def __init__(self):
21
- super().__init__()
22
-
23
- def forward(self):
24
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
burmesegpt/tokenizer.py DELETED
@@ -1,18 +0,0 @@
1
- class Tokenizer:
2
- def __init__(self):
3
- pass
4
-
5
- def train(self,text,num_worker,verbose=False):
6
- pass
7
-
8
- def encoder(self,text):
9
- pass
10
-
11
- def decoder(self,ids):
12
- pass
13
-
14
- def save(self):
15
- pass
16
-
17
- def load(self):
18
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
burmesegpt/utils.py DELETED
@@ -1,7 +0,0 @@
1
- # utils
2
-
3
- def get_stats():
4
- pass
5
-
6
- def merge():
7
- pass
 
 
 
 
 
 
 
 
sample.py DELETED
@@ -1,10 +0,0 @@
1
- # sample the texts
2
- from burmesegpt import BurmeseGpt
3
-
4
- gpt = BurmeseGpt()
5
-
6
- model_name = ''
7
-
8
- gpt.load_pretrained(model_name)
9
-
10
- gpt.sample()
 
 
 
 
 
 
 
 
 
 
 
setup.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ from setuptools import setup
2
+
3
+ setup(
4
+ name="burmese_gpt",
5
+ )
tests/test_dataloader.py DELETED
@@ -1,9 +0,0 @@
1
- from ..burmesegpt import Data
2
-
3
-
4
- def check_loader():
5
- pass
6
-
7
-
8
- def check_output_dim():
9
- pass
 
 
 
 
 
 
 
 
 
 
tests/test_tokenizer.py DELETED
@@ -1,15 +0,0 @@
1
- import unittest
2
- import torch
3
-
4
-
5
- class TestTokenizer(unittest.TestCase):
6
- def test_downloading(self):
7
- url = ""
8
- pass
9
-
10
- def test_loading(self):
11
- pass
12
-
13
-
14
- if __name__ == "__main__":
15
- unittest.main()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
training.py DELETED
@@ -1,7 +0,0 @@
1
- from burmesegpt import BurmeseGpt
2
-
3
- gpt = BurmeseGpt()
4
-
5
- gpt.train()
6
-
7
- out_dir = 'out'