KhairulAmirinUM commited on
Commit
95aef6a
·
1 Parent(s): 3fe739b
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .env
.idea/workspace.xml CHANGED
@@ -46,6 +46,7 @@
46
  "Python.cth.executor": "Run",
47
  "Python.hf (1).executor": "Run",
48
  "Python.hf.executor": "Run",
 
49
  "Python.main (1).executor": "Run",
50
  "Python.main.executor": "Run",
51
  "RunOnceActivity.ShowReadmeOnStart": "true",
@@ -107,9 +108,32 @@
107
  <option name="INPUT_FILE" value="" />
108
  <method v="2" />
109
  </configuration>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  <recent_temporary>
111
  <list>
112
  <item itemvalue="Python.hf" />
 
113
  <item itemvalue="Python.cth" />
114
  </list>
115
  </recent_temporary>
 
46
  "Python.cth.executor": "Run",
47
  "Python.hf (1).executor": "Run",
48
  "Python.hf.executor": "Run",
49
+ "Python.hf2.executor": "Run",
50
  "Python.main (1).executor": "Run",
51
  "Python.main.executor": "Run",
52
  "RunOnceActivity.ShowReadmeOnStart": "true",
 
108
  <option name="INPUT_FILE" value="" />
109
  <method v="2" />
110
  </configuration>
111
+ <configuration name="hf2" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
112
+ <module name="backend" />
113
+ <option name="ENV_FILES" value="" />
114
+ <option name="INTERPRETER_OPTIONS" value="" />
115
+ <option name="PARENT_ENVS" value="true" />
116
+ <envs>
117
+ <env name="PYTHONUNBUFFERED" value="1" />
118
+ </envs>
119
+ <option name="SDK_HOME" value="" />
120
+ <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/src" />
121
+ <option name="IS_MODULE_SDK" value="true" />
122
+ <option name="ADD_CONTENT_ROOTS" value="true" />
123
+ <option name="ADD_SOURCE_ROOTS" value="true" />
124
+ <option name="SCRIPT_NAME" value="$PROJECT_DIR$/src/hf2.py" />
125
+ <option name="PARAMETERS" value="" />
126
+ <option name="SHOW_COMMAND_LINE" value="false" />
127
+ <option name="EMULATE_TERMINAL" value="false" />
128
+ <option name="MODULE_MODE" value="false" />
129
+ <option name="REDIRECT_INPUT" value="false" />
130
+ <option name="INPUT_FILE" value="" />
131
+ <method v="2" />
132
+ </configuration>
133
  <recent_temporary>
134
  <list>
135
  <item itemvalue="Python.hf" />
136
+ <item itemvalue="Python.hf2" />
137
  <item itemvalue="Python.cth" />
138
  </list>
139
  </recent_temporary>
src/hf.py CHANGED
@@ -2,7 +2,7 @@ from transformers import BertTokenizer, BertForSequenceClassification,TextClassi
2
  # Load tokenizer and model from the fine-tuned directory
3
  model_path = 'intent_classification/TinyBERT_106_V2' # can try other checkpoints
4
 
5
- tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
6
  # model = BertForSequenceClassification.from_pretrained(model_path)
7
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
8
 
 
2
  # Load tokenizer and model from the fine-tuned directory
3
  model_path = 'intent_classification/TinyBERT_106_V2' # can try other checkpoints
4
 
5
+ tokenizer = BertTokenizer.from_pretrained(model_path)
6
  # model = BertForSequenceClassification.from_pretrained(model_path)
7
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
8
 
src/hf2.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from transformers import BertTokenizer, BertForSequenceClassification,TextClassificationPipeline, AutoModelForSequenceClassification
2
+ # Load tokenizer and model from the fine-tuned directory
3
+ model_path = 'intent_classification/TinyBERT_106_V2' # can try other checkpoints
4
+
5
+ tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")
6
+ tokenizer.save_pretrained(model_path)
src/intent_classification/TinyBERT_106_V2/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
src/intent_classification/TinyBERT_106_V2/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_basic_tokenize": true,
47
+ "do_lower_case": true,
48
+ "extra_special_tokens": {},
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 512,
51
+ "never_split": null,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "strip_accents": null,
55
+ "tokenize_chinese_chars": true,
56
+ "tokenizer_class": "BertTokenizer",
57
+ "unk_token": "[UNK]"
58
+ }
src/intent_classification/TinyBERT_106_V2/vocab.txt ADDED
The diff for this file is too large to render. See raw diff