Spaces:

zaibutcooler
/

burmese-gpt

Sleeping

App Files Files Community

Zai commited on Nov 4, 2024

Commit

bd78e12

1 Parent(s): 6feb6a4

Reset project for cleaner approach

Browse files

Files changed (23) hide show

.github/workflows/hugging-face.yaml +0 -42
.idea/.gitignore +8 -0
.idea/burmese-gpt.iml +8 -0
.idea/inspectionProfiles/profiles_settings.xml +6 -0
.idea/misc.xml +7 -0
.idea/modules.xml +8 -0
.idea/vcs.xml +6 -0
README.md +1 -2
burmese_gpt/__init__.py +0 -0
burmese_gpt/config.py +20 -0
burmese_gpt/model.py +7 -0
burmesegpt/__init__.py +0 -1
burmesegpt/config.py +0 -9
burmesegpt/core.py +0 -46
burmesegpt/data_prep.py +0 -19
burmesegpt/models.py +0 -24
burmesegpt/tokenizer.py +0 -18
burmesegpt/utils.py +0 -7
sample.py +0 -10
setup.py +5 -0
tests/test_dataloader.py +0 -9
tests/test_tokenizer.py +0 -15
training.py +0 -7

.github/workflows/hugging-face.yaml DELETED Viewed

@@ -1,42 +0,0 @@
-name: Uploading on Huggingface
-on:
-  push:
-    branches: [main]
-  workflow_dispatch:
-jobs:
-  sync-to-hub:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-          lfs: true
-      - name: Set Git identity
-        run: |
-          git config --global user.email "[email protected]"
-          git config --global user.name "GitHub Actions"
-      - name: Update README.md
-        run: |
-          tmp_file=$(mktemp)
-          echo "---" >> $tmp_file
-          echo "title: Burmese GPT" >> $tmp_file
-          echo "emoji: 💫️" >> $tmp_file
-          echo "colorFrom: yellow" >> $tmp_file
-          echo "colorTo: blue" >> $tmp_file
-          echo "sdk: streamlit" >> $tmp_file
-          echo "sdk_version: 1.29.0" >> $tmp_file
-          echo "app_file: space.py" >> $tmp_file
-          echo "pinned: false" >> $tmp_file
-          echo "license: openrail" >> $tmp_file
-          echo "---" >> $tmp_file
-          echo "" >> $tmp_file
-          cat README.md >> $tmp_file
-          mv $tmp_file README.md
-          git add README.md
-          git commit -m "Updated README.md"
-      - name: Push to hub
-        env:
-          HF_TOKEN: ${{ secrets.HF_TOKEN }}
-        run: git push https://zaibutcooler:[email protected]/spaces/zaibutcooler/burmese-gpt --force main

.idea/.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml

.idea/burmese-gpt.iml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="jdk" jdkName="/opt/anaconda3" jdkType="Python SDK" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>

.idea/inspectionProfiles/profiles_settings.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>

.idea/misc.xml ADDED Viewed

	@@ -0,0 +1,7 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="Black">
+    <option name="sdkName" value="/opt/anaconda3" />
+  </component>
+  <component name="ProjectRootManager" version="2" project-jdk-name="/opt/anaconda3" project-jdk-type="Python SDK" />
+</project>

.idea/modules.xml ADDED Viewed

	@@ -0,0 +1,8 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/burmese-gpt.iml" filepath="$PROJECT_DIR$/.idea/burmese-gpt.iml" />
+    </modules>
+  </component>
+</project>

.idea/vcs.xml ADDED Viewed

	@@ -0,0 +1,6 @@

+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>

README.md CHANGED Viewed

	@@ -1,2 +1 @@
1	-
2	- # Burmese GPT


1	+ # Burmese GPT V2

burmese_gpt/__init__.py ADDED Viewed

File without changes

burmese_gpt/config.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from dataclasses import dataclass
+@dataclass
+class ModelConfig:
+    vocab_size: int = 30000
+    embed_dim: int = 256
+    num_heads: int = 8
+    num_layers: int = 6
+    dropout: float = 0.1
+    max_seq_len: int = 512
+@dataclass
+class TrainingConfig:
+    batch_size: int = 32
+    learning_rate: float = 5e-5
+    num_epochs: int = 10
+    warmup_steps: int = 1000
+    weight_decay: float = 0.01
+    checkpoint_dir: str = "checkpoints"
+    log_dir: str = "logs"

burmese_gpt/model.py ADDED Viewed

	@@ -0,0 +1,7 @@

+import torch
+import torch.nn as nn
+class BurmeseGPT(nn.Module):
+    def __init__(self):
+        super(BurmeseGPT, self).__init__()
+        pass

burmesegpt/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- from .core import BurmeseGpt

burmesegpt/config.py DELETED Viewed

@@ -1,9 +0,0 @@
-import torch
-class Config:
-    def __init__(self) -> None:
-        pass
-config = Config()
-device = 'cuda' if torch.cuda.is_available() else 'cpu'

burmesegpt/core.py DELETED Viewed

@@ -1,46 +0,0 @@
-import torch
-from huggingface_hub import login
-from .models import SelfAttention,MLP,GPT
-from .tokenizer import Tokenizer
-from .config import Config
-from .data_prep import Data
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
-tokenizer = Tokenizer()
-class BurmeseGpt:
-    def __init__(self,tk=tokenizer):
-        self.attention = SelfAttention().to(device)
-        self.mlp = MLP().to(device)
-        self.config = Config().to(device)
-        self.train_data = Data().to(device)
-        self.tokenizer = tk
-    def save_pretrained(self, name="burmese-gpt"):
-        print("Uploading model...")
-        self.model.save_pretrained(name)
-        print(f"Model saved locally as '{name}'")
-        self.model.push_to_hub(name)
-        print(f"Model '{name}' uploaded to the Hugging Face Model Hub")
-    def load_pretrained(self, model_id="zaibutcooler/burmese-gpt"):
-        print("Loading model...")
-        model = model.from_pretrained(model_id)
-        print(f"Model '{model_id}' loaded successfully")
-        return model
-    def huggingface_login(self,token):
-        login(token)
-    def pretrain(self):
-        pass
-    def fine_tune(self):
-        pass
-    def generate(self,entry=''):
-        result = None
-        return result

burmesegpt/data_prep.py DELETED Viewed

@@ -1,19 +0,0 @@
-# to preps data
-from datasets import load_dataset
-from torch.utils.data import Dataset
-class Data(Dataset):
-    def __init__(self):
-        super().__init__()
-        self.texts = None
-        self.data
-    def __len__(self):
-        return None
-    def __getitem__(self, index):
-        return self.trainset[index]

burmesegpt/models.py DELETED Viewed

@@ -1,24 +0,0 @@
-import torch
-from torch import nn
-from huggingface_hub import PyTorchModelHubMixin
-class SelfAttention(nn.Module,PyTorchModelHubMixin):
-    def __init__(self):
-        pass
-    def forward(self):
-        pass
-class MLP(nn.Module,PyTorchModelHubMixin):
-    def __init__(self):
-        pass
-    def forward(self):
-        pass
-class GPT(nn.Module,PyTorchModelHubMixin):
-    def __init__(self):
-        super().__init__()
-    def forward(self):
-        pass

burmesegpt/tokenizer.py DELETED Viewed

@@ -1,18 +0,0 @@
-class Tokenizer:
-    def __init__(self):
-        pass
-    def train(self,text,num_worker,verbose=False):
-        pass
-    def encoder(self,text):
-        pass
-    def decoder(self,ids):
-        pass
-    def save(self):
-        pass
-    def load(self):
-        pass

burmesegpt/utils.py DELETED Viewed

@@ -1,7 +0,0 @@
-# utils
-def get_stats():
-    pass
-def merge():
-    pass

sample.py DELETED Viewed

@@ -1,10 +0,0 @@
-# sample the texts
-from burmesegpt import BurmeseGpt
-gpt = BurmeseGpt()
-model_name = ''
-gpt.load_pretrained(model_name)
-gpt.sample()

setup.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from setuptools import  setup
+setup(
+    name="burmese_gpt",
+)

tests/test_dataloader.py DELETED Viewed

@@ -1,9 +0,0 @@
-from ..burmesegpt import Data
-def check_loader():
-    pass
-def check_output_dim():
-    pass

tests/test_tokenizer.py DELETED Viewed

@@ -1,15 +0,0 @@
-import unittest
-import torch
-class TestTokenizer(unittest.TestCase):
-    def test_downloading(self):
-        url = ""
-        pass
-    def test_loading(self):
-        pass
-if __name__ == "__main__":
-    unittest.main()

training.py DELETED Viewed

@@ -1,7 +0,0 @@
-from burmesegpt import BurmeseGpt
-gpt = BurmeseGpt()
-gpt.train()
-out_dir = 'out'