Commit
·
263228f
1
Parent(s):
e370830
Upload folder using huggingface_hub
Browse files- added_tokens.json +1 -0
- artifacts.ckpt +3 -0
- config.json +24 -0
- config.yaml +29 -0
- events.out.tfevents.1694167436.ip-172-31-6-40.2940.0 +3 -0
- events.out.tfevents.1694172390.ip-172-31-6-40.23480.0 +3 -0
- hparams.yaml +1 -0
- pytorch_model.bin +3 -0
- sentencepiece.bpe.model +3 -0
- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
added_tokens.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"<sep/>": 57522, "<s_iitcdip>": 57523, "<s_synthdog>": 57524, "</s_menu>": 57525, "<s_menu>": 57526, "</s_nm>": 57527, "<s_nm>": 57528, "</s_cnt>": 57529, "<s_cnt>": 57530, "</s_price>": 57531, "<s_price>": 57532, "</s_sub_total>": 57533, "<s_sub_total>": 57534, "</s_subtotal_price>": 57535, "<s_subtotal_price>": 57536, "</s_service_price>": 57537, "<s_service_price>": 57538, "</s_tax_price>": 57539, "<s_tax_price>": 57540, "</s_etc>": 57541, "<s_etc>": 57542, "</s_total>": 57543, "<s_total>": 57544, "</s_total_price>": 57545, "<s_total_price>": 57546, "</s_sub>": 57547, "<s_sub>": 57548, "</s_cashprice>": 57549, "<s_cashprice>": 57550, "</s_changeprice>": 57551, "<s_changeprice>": 57552, "</s_menutype_cnt>": 57553, "<s_menutype_cnt>": 57554, "</s_menuqty_cnt>": 57555, "<s_menuqty_cnt>": 57556, "</s_discount_price>": 57557, "<s_discount_price>": 57558, "</s_unitprice>": 57559, "<s_unitprice>": 57560, "</s_total_etc>": 57561, "<s_total_etc>": 57562, "</s_creditcardprice>": 57563, "<s_creditcardprice>": 57564, "</s_num>": 57565, "<s_num>": 57566, "</s_discountprice>": 57567, "<s_discountprice>": 57568, "</s_emoneyprice>": 57569, "<s_emoneyprice>": 57570, "</s_void_menu>": 57571, "<s_void_menu>": 57572, "</s_othersvc_price>": 57573, "<s_othersvc_price>": 57574, "</s_vatyn>": 57575, "<s_vatyn>": 57576, "</s_itemsubtotal>": 57577, "<s_itemsubtotal>": 57578, "<s_cord-v2>": 57579, "</s_UID>": 57580, "<s_UID>": 57581, "</s_NAME>": 57582, "<s_NAME>": 57583, "</s_GENDER>": 57584, "<s_GENDER>": 57585, "</s_DOB>": 57586, "<s_DOB>": 57587, "</s_YOP>": 57588, "<s_YOP>": 57589, "</s_SCHOOL>": 57590, "<s_SCHOOL>": 57591, "</s_MARKS>": 57592, "<s_MARKS>": 57593, "</s_BOARD>": 57594, "<s_BOARD>": 57595, "</s_STATUS>": 57596, "<s_STATUS>": 57597, "</s_STATE>": 57598, "<s_STATE>": 57599, "</s_TMARKS>": 57600, "<s_TMARKS>": 57601, "<s_idm_v1>": 57602}
|
artifacts.ckpt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:08b0f4450b365b7571208dc7e11f03b969e212d78f895f209675ada93f9b9160
|
3 |
+
size 1609389955
|
config.json
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "naver-clova-ix/donut-base-finetuned-cord-v2",
|
3 |
+
"align_long_axis": false,
|
4 |
+
"architectures": [
|
5 |
+
"DonutModel"
|
6 |
+
],
|
7 |
+
"decoder_layer": 4,
|
8 |
+
"encoder_layer": [
|
9 |
+
2,
|
10 |
+
2,
|
11 |
+
14,
|
12 |
+
2
|
13 |
+
],
|
14 |
+
"input_size": [
|
15 |
+
1280,
|
16 |
+
960
|
17 |
+
],
|
18 |
+
"max_length": 768,
|
19 |
+
"max_position_embeddings": 768,
|
20 |
+
"model_type": "donut",
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.11.3",
|
23 |
+
"window_size": 10
|
24 |
+
}
|
config.yaml
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
resume_from_checkpoint_path: './result/train_cord/id_marksheet_v3/'
|
2 |
+
result_path: './result'
|
3 |
+
pretrained_model_name_or_path: 'naver-clova-ix/donut-base-finetuned-cord-v2'
|
4 |
+
dataset_name_or_paths:
|
5 |
+
- 'idm_v1'
|
6 |
+
sort_json_key: True
|
7 |
+
train_batch_sizes:
|
8 |
+
- 3
|
9 |
+
val_batch_sizes:
|
10 |
+
- 1
|
11 |
+
input_size:
|
12 |
+
- 1280
|
13 |
+
- 960
|
14 |
+
max_length: 768
|
15 |
+
align_long_axis: False
|
16 |
+
num_nodes: 1
|
17 |
+
seed: 2022
|
18 |
+
lr: 3e-05
|
19 |
+
warmup_steps: 30
|
20 |
+
num_training_samples_per_epoch: 100
|
21 |
+
max_epochs: 30
|
22 |
+
max_steps: -1
|
23 |
+
num_workers: 8
|
24 |
+
val_check_interval: 1.0
|
25 |
+
check_val_every_n_epoch: 3
|
26 |
+
gradient_clip_val: 1.0
|
27 |
+
verbose: True
|
28 |
+
exp_name: 'train_cord'
|
29 |
+
exp_version: 'id_marksheet_v3'
|
events.out.tfevents.1694167436.ip-172-31-6-40.2940.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59a31e1d547fa370fd7ecd35619b6ebc43d33d7f3972b440241faf7243e9d36c
|
3 |
+
size 15171
|
events.out.tfevents.1694172390.ip-172-31-6-40.23480.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d7b3ed6a116ae76fae8362036da8e44220cbe8ef3a654ddf39d657af2d31d0f
|
3 |
+
size 69020
|
hparams.yaml
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:59a4dc061ec997532fa0999ffc6cf6498f22b9260f0aef9cbe95a2e01699b7e8
|
3 |
+
size 858668935
|
sentencepiece.bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb9e3dce4c326195d08fc3dd0f7e2eee1da8595c847bf4c1a9c78b7a82d47e2d
|
3 |
+
size 1296245
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true}, "additional_special_tokens": ["<s_idm_v1>"]}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "sp_model_kwargs": {}, "from_slow": true, "name_or_path": "naver-clova-ix/donut-base-finetuned-cord-v2", "processor_class": "DonutProcessor", "special_tokens_map_file": null, "tokenizer_file": "/home/ubuntu/.cache/huggingface/transformers/1bbb0def15649cd8877b8ea9ce1d35c6c0b4a3ef3cae0bd6847f67cec5c0e4d0.74ac633c96bbb15e1d2c8b1a15861957f8056020dedbba9d47111e17f73cc388", "tokenizer_class": "XLMRobertaTokenizer"}
|