Upload folder using huggingface_hub
Browse files- examples/ytb_asr_batch3_ms_en/data-00000-of-00001.arrow +3 -0
- examples/ytb_asr_batch3_ms_en/dataset_info.json +46 -0
- examples/ytb_asr_batch3_ms_en/state.json +13 -0
- examples/ytb_asr_batch3_ta_en/data-00000-of-00001.arrow +3 -0
- examples/ytb_asr_batch3_ta_en/dataset_info.json +46 -0
- examples/ytb_asr_batch3_ta_en/state.json +13 -0
- examples/ytb_asr_batch3_zh_en/data-00000-of-00001.arrow +3 -0
- examples/ytb_asr_batch3_zh_en/dataset_info.json +68 -0
- examples/ytb_asr_batch3_zh_en/state.json +13 -0
- examples/ytb_sds_batch3_chinese/data-00000-of-00001.arrow +3 -0
- examples/ytb_sds_batch3_chinese/dataset_info.json +0 -22
- examples/ytb_sds_batch3_chinese/state.json +8 -2
- examples/ytb_sds_batch3_malay/data-00000-of-00001.arrow +3 -0
- examples/ytb_sds_batch3_malay/dataset_info.json +0 -22
- examples/ytb_sds_batch3_malay/state.json +1 -1
- examples/ytb_sds_batch3_ms_en/data-00000-of-00001.arrow +3 -0
- examples/ytb_sds_batch3_ms_en/dataset_info.json +46 -0
- examples/ytb_sds_batch3_ms_en/state.json +13 -0
- examples/ytb_sds_batch3_tamil/data-00000-of-00001.arrow +3 -0
- examples/ytb_sds_batch3_tamil/dataset_info.json +0 -22
- examples/ytb_sds_batch3_tamil/state.json +1 -1
- examples/ytb_sds_batch3_zh_en/data-00000-of-00001.arrow +3 -0
- examples/ytb_sds_batch3_zh_en/dataset_info.json +46 -0
- examples/ytb_sds_batch3_zh_en/state.json +13 -0
- examples/ytb_sqa_batch3_malay/data-00000-of-00001.arrow +3 -0
- examples/ytb_sqa_batch3_malay/dataset_info.json +0 -22
- examples/ytb_sqa_batch3_malay/state.json +1 -1
- examples/ytb_sqa_batch3_ms_en/data-00000-of-00001.arrow +3 -0
- examples/ytb_sqa_batch3_ms_en/dataset_info.json +46 -0
- examples/ytb_sqa_batch3_ms_en/state.json +13 -0
- examples/ytb_sqa_batch3_tamil/data-00000-of-00001.arrow +3 -0
- examples/ytb_sqa_batch3_tamil/dataset_info.json +0 -22
- examples/ytb_sqa_batch3_tamil/state.json +1 -1
- examples/ytb_sqa_batch3_zh_en/data-00000-of-00001.arrow +3 -0
- examples/ytb_sqa_batch3_zh_en/dataset_info.json +46 -0
- examples/ytb_sqa_batch3_zh_en/state.json +13 -0
- examples/{ds_name}/data-00000-of-00001.arrow +3 -0
- examples/{ds_name}/dataset_info.json +46 -0
- examples/{ds_name}/state.json +19 -0
examples/ytb_asr_batch3_ms_en/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:208c237357205ca335366be040f3b0942a2b74c9e02fc8a1f6cc129a753af1ad
|
3 |
+
size 456232
|
examples/ytb_asr_batch3_ms_en/dataset_info.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"context": {
|
6 |
+
"text": {
|
7 |
+
"dtype": "string",
|
8 |
+
"_type": "Value"
|
9 |
+
},
|
10 |
+
"audio": {
|
11 |
+
"sampling_rate": 16000,
|
12 |
+
"_type": "Audio"
|
13 |
+
}
|
14 |
+
},
|
15 |
+
"instruction": {
|
16 |
+
"text": {
|
17 |
+
"dtype": "string",
|
18 |
+
"_type": "Value"
|
19 |
+
},
|
20 |
+
"audio": {
|
21 |
+
"sampling_rate": 16000,
|
22 |
+
"_type": "Audio"
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"answer": {
|
26 |
+
"text": {
|
27 |
+
"dtype": "string",
|
28 |
+
"_type": "Value"
|
29 |
+
},
|
30 |
+
"audio": {
|
31 |
+
"sampling_rate": 16000,
|
32 |
+
"_type": "Audio"
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"language": {
|
36 |
+
"dtype": "string",
|
37 |
+
"_type": "Value"
|
38 |
+
},
|
39 |
+
"audio_length": {
|
40 |
+
"dtype": "float64",
|
41 |
+
"_type": "Value"
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"homepage": "",
|
45 |
+
"license": ""
|
46 |
+
}
|
examples/ytb_asr_batch3_ms_en/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "9477ebec80490c46",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": null
|
13 |
+
}
|
examples/ytb_asr_batch3_ta_en/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:69bbb5cb413cf1f1f6644b16f9472f903f13ee8e355d34e00242238cbfa49247
|
3 |
+
size 1381736
|
examples/ytb_asr_batch3_ta_en/dataset_info.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"context": {
|
6 |
+
"text": {
|
7 |
+
"dtype": "string",
|
8 |
+
"_type": "Value"
|
9 |
+
},
|
10 |
+
"audio": {
|
11 |
+
"sampling_rate": 16000,
|
12 |
+
"_type": "Audio"
|
13 |
+
}
|
14 |
+
},
|
15 |
+
"instruction": {
|
16 |
+
"text": {
|
17 |
+
"dtype": "string",
|
18 |
+
"_type": "Value"
|
19 |
+
},
|
20 |
+
"audio": {
|
21 |
+
"sampling_rate": 16000,
|
22 |
+
"_type": "Audio"
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"answer": {
|
26 |
+
"text": {
|
27 |
+
"dtype": "string",
|
28 |
+
"_type": "Value"
|
29 |
+
},
|
30 |
+
"audio": {
|
31 |
+
"sampling_rate": 16000,
|
32 |
+
"_type": "Audio"
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"language": {
|
36 |
+
"dtype": "string",
|
37 |
+
"_type": "Value"
|
38 |
+
},
|
39 |
+
"audio_length": {
|
40 |
+
"dtype": "float64",
|
41 |
+
"_type": "Value"
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"homepage": "",
|
45 |
+
"license": ""
|
46 |
+
}
|
examples/ytb_asr_batch3_ta_en/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "91fc9969182cd36e",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": null
|
13 |
+
}
|
examples/ytb_asr_batch3_zh_en/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18a952214ba1c5c6519ce77b49aada2d250650fd70cc1270e812b62ad1cc6c4d
|
3 |
+
size 387600
|
examples/ytb_asr_batch3_zh_en/dataset_info.json
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"context": {
|
6 |
+
"text": {
|
7 |
+
"dtype": "string",
|
8 |
+
"_type": "Value"
|
9 |
+
},
|
10 |
+
"audio": {
|
11 |
+
"sampling_rate": 16000,
|
12 |
+
"_type": "Audio"
|
13 |
+
}
|
14 |
+
},
|
15 |
+
"instruction": {
|
16 |
+
"text": {
|
17 |
+
"dtype": "string",
|
18 |
+
"_type": "Value"
|
19 |
+
},
|
20 |
+
"audio": {
|
21 |
+
"sampling_rate": 16000,
|
22 |
+
"_type": "Audio"
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"answer": {
|
26 |
+
"text": {
|
27 |
+
"dtype": "string",
|
28 |
+
"_type": "Value"
|
29 |
+
},
|
30 |
+
"audio": {
|
31 |
+
"sampling_rate": 16000,
|
32 |
+
"_type": "Audio"
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"language": {
|
36 |
+
"dtype": "string",
|
37 |
+
"_type": "Value"
|
38 |
+
},
|
39 |
+
"audio_length": {
|
40 |
+
"dtype": "float64",
|
41 |
+
"_type": "Value"
|
42 |
+
},
|
43 |
+
"other_attributes": {
|
44 |
+
"youtube_name": {
|
45 |
+
"dtype": "string",
|
46 |
+
"_type": "Value"
|
47 |
+
},
|
48 |
+
"start_time_in_sec": {
|
49 |
+
"dtype": "float32",
|
50 |
+
"_type": "Value"
|
51 |
+
},
|
52 |
+
"end_time_in_sec": {
|
53 |
+
"dtype": "float32",
|
54 |
+
"_type": "Value"
|
55 |
+
},
|
56 |
+
"link": {
|
57 |
+
"dtype": "string",
|
58 |
+
"_type": "Value"
|
59 |
+
},
|
60 |
+
"percentage_of_chinese": {
|
61 |
+
"dtype": "string",
|
62 |
+
"_type": "Value"
|
63 |
+
}
|
64 |
+
}
|
65 |
+
},
|
66 |
+
"homepage": "",
|
67 |
+
"license": ""
|
68 |
+
}
|
examples/ytb_asr_batch3_zh_en/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "0aa769aa06bf3bf6",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": null
|
13 |
+
}
|
examples/ytb_sds_batch3_chinese/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5dfc9d9b3cb2c52535641e6e080aae7608bdea7793a75b0d14cf5e3c2cdebf26
|
3 |
+
size 305136
|
examples/ytb_sds_batch3_chinese/dataset_info.json
CHANGED
@@ -39,28 +39,6 @@
|
|
39 |
"audio_length": {
|
40 |
"dtype": "float64",
|
41 |
"_type": "Value"
|
42 |
-
},
|
43 |
-
"other_attributes": {
|
44 |
-
"youtube_name": {
|
45 |
-
"dtype": "string",
|
46 |
-
"_type": "Value"
|
47 |
-
},
|
48 |
-
"start_time_in_sec": {
|
49 |
-
"dtype": "float32",
|
50 |
-
"_type": "Value"
|
51 |
-
},
|
52 |
-
"end_time_in_sec": {
|
53 |
-
"dtype": "float32",
|
54 |
-
"_type": "Value"
|
55 |
-
},
|
56 |
-
"link": {
|
57 |
-
"dtype": "string",
|
58 |
-
"_type": "Value"
|
59 |
-
},
|
60 |
-
"percentage_of_chinese": {
|
61 |
-
"dtype": "string",
|
62 |
-
"_type": "Value"
|
63 |
-
}
|
64 |
}
|
65 |
},
|
66 |
"homepage": "",
|
|
|
39 |
"audio_length": {
|
40 |
"dtype": "float64",
|
41 |
"_type": "Value"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
}
|
43 |
},
|
44 |
"homepage": "",
|
examples/ytb_sds_batch3_chinese/state.json
CHANGED
@@ -4,8 +4,14 @@
|
|
4 |
"filename": "data-00000-of-00001.arrow"
|
5 |
}
|
6 |
],
|
7 |
-
"_fingerprint": "
|
8 |
-
"_format_columns":
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
"_format_kwargs": {},
|
10 |
"_format_type": null,
|
11 |
"_output_all_columns": false,
|
|
|
4 |
"filename": "data-00000-of-00001.arrow"
|
5 |
}
|
6 |
],
|
7 |
+
"_fingerprint": "4e7730924a13bda6",
|
8 |
+
"_format_columns": [
|
9 |
+
"answer",
|
10 |
+
"audio_length",
|
11 |
+
"context",
|
12 |
+
"instruction",
|
13 |
+
"language"
|
14 |
+
],
|
15 |
"_format_kwargs": {},
|
16 |
"_format_type": null,
|
17 |
"_output_all_columns": false,
|
examples/ytb_sds_batch3_malay/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44db0547d89fc3b1eda39d05d1d2cc69975e65815b9a9e5bb68937f6b54b3808
|
3 |
+
size 423552
|
examples/ytb_sds_batch3_malay/dataset_info.json
CHANGED
@@ -39,28 +39,6 @@
|
|
39 |
"audio_length": {
|
40 |
"dtype": "float64",
|
41 |
"_type": "Value"
|
42 |
-
},
|
43 |
-
"other_attributes": {
|
44 |
-
"youtube_name": {
|
45 |
-
"dtype": "string",
|
46 |
-
"_type": "Value"
|
47 |
-
},
|
48 |
-
"start_time_in_sec": {
|
49 |
-
"dtype": "float32",
|
50 |
-
"_type": "Value"
|
51 |
-
},
|
52 |
-
"end_time_in_sec": {
|
53 |
-
"dtype": "float32",
|
54 |
-
"_type": "Value"
|
55 |
-
},
|
56 |
-
"link": {
|
57 |
-
"dtype": "string",
|
58 |
-
"_type": "Value"
|
59 |
-
},
|
60 |
-
"percentage_of_malay": {
|
61 |
-
"dtype": "string",
|
62 |
-
"_type": "Value"
|
63 |
-
}
|
64 |
}
|
65 |
},
|
66 |
"homepage": "",
|
|
|
39 |
"audio_length": {
|
40 |
"dtype": "float64",
|
41 |
"_type": "Value"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
}
|
43 |
},
|
44 |
"homepage": "",
|
examples/ytb_sds_batch3_malay/state.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
"filename": "data-00000-of-00001.arrow"
|
5 |
}
|
6 |
],
|
7 |
-
"_fingerprint": "
|
8 |
"_format_columns": null,
|
9 |
"_format_kwargs": {},
|
10 |
"_format_type": null,
|
|
|
4 |
"filename": "data-00000-of-00001.arrow"
|
5 |
}
|
6 |
],
|
7 |
+
"_fingerprint": "908fa2e876912bda",
|
8 |
"_format_columns": null,
|
9 |
"_format_kwargs": {},
|
10 |
"_format_type": null,
|
examples/ytb_sds_batch3_ms_en/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c5379c93c60062a5ef603be99787cdf1e2cf654291e9abb888fe2828cf1b4a83
|
3 |
+
size 471624
|
examples/ytb_sds_batch3_ms_en/dataset_info.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"context": {
|
6 |
+
"text": {
|
7 |
+
"dtype": "string",
|
8 |
+
"_type": "Value"
|
9 |
+
},
|
10 |
+
"audio": {
|
11 |
+
"sampling_rate": 16000,
|
12 |
+
"_type": "Audio"
|
13 |
+
}
|
14 |
+
},
|
15 |
+
"instruction": {
|
16 |
+
"text": {
|
17 |
+
"dtype": "string",
|
18 |
+
"_type": "Value"
|
19 |
+
},
|
20 |
+
"audio": {
|
21 |
+
"sampling_rate": 16000,
|
22 |
+
"_type": "Audio"
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"answer": {
|
26 |
+
"text": {
|
27 |
+
"dtype": "string",
|
28 |
+
"_type": "Value"
|
29 |
+
},
|
30 |
+
"audio": {
|
31 |
+
"sampling_rate": 16000,
|
32 |
+
"_type": "Audio"
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"language": {
|
36 |
+
"dtype": "string",
|
37 |
+
"_type": "Value"
|
38 |
+
},
|
39 |
+
"audio_length": {
|
40 |
+
"dtype": "float64",
|
41 |
+
"_type": "Value"
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"homepage": "",
|
45 |
+
"license": ""
|
46 |
+
}
|
examples/ytb_sds_batch3_ms_en/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "0b7560df0446fc80",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": null
|
13 |
+
}
|
examples/ytb_sds_batch3_tamil/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d6122b9f9c788fd5c1a41487b39451646c205e5531ec656ddd177dc216c3f64
|
3 |
+
size 422976
|
examples/ytb_sds_batch3_tamil/dataset_info.json
CHANGED
@@ -39,28 +39,6 @@
|
|
39 |
"audio_length": {
|
40 |
"dtype": "float64",
|
41 |
"_type": "Value"
|
42 |
-
},
|
43 |
-
"other_attributes": {
|
44 |
-
"youtube_name": {
|
45 |
-
"dtype": "string",
|
46 |
-
"_type": "Value"
|
47 |
-
},
|
48 |
-
"start_time_in_sec": {
|
49 |
-
"dtype": "float32",
|
50 |
-
"_type": "Value"
|
51 |
-
},
|
52 |
-
"end_time_in_sec": {
|
53 |
-
"dtype": "float32",
|
54 |
-
"_type": "Value"
|
55 |
-
},
|
56 |
-
"link": {
|
57 |
-
"dtype": "string",
|
58 |
-
"_type": "Value"
|
59 |
-
},
|
60 |
-
"percentage_of_tamil": {
|
61 |
-
"dtype": "string",
|
62 |
-
"_type": "Value"
|
63 |
-
}
|
64 |
}
|
65 |
},
|
66 |
"homepage": "",
|
|
|
39 |
"audio_length": {
|
40 |
"dtype": "float64",
|
41 |
"_type": "Value"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
}
|
43 |
},
|
44 |
"homepage": "",
|
examples/ytb_sds_batch3_tamil/state.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
"filename": "data-00000-of-00001.arrow"
|
5 |
}
|
6 |
],
|
7 |
-
"_fingerprint": "
|
8 |
"_format_columns": null,
|
9 |
"_format_kwargs": {},
|
10 |
"_format_type": null,
|
|
|
4 |
"filename": "data-00000-of-00001.arrow"
|
5 |
}
|
6 |
],
|
7 |
+
"_fingerprint": "3b85be829d277232",
|
8 |
"_format_columns": null,
|
9 |
"_format_kwargs": {},
|
10 |
"_format_type": null,
|
examples/ytb_sds_batch3_zh_en/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2db4c8a847e99be0c1f1852e713d9a18c569eabd9a2468155c0a191df7b24382
|
3 |
+
size 427944
|
examples/ytb_sds_batch3_zh_en/dataset_info.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"context": {
|
6 |
+
"text": {
|
7 |
+
"dtype": "string",
|
8 |
+
"_type": "Value"
|
9 |
+
},
|
10 |
+
"audio": {
|
11 |
+
"sampling_rate": 16000,
|
12 |
+
"_type": "Audio"
|
13 |
+
}
|
14 |
+
},
|
15 |
+
"instruction": {
|
16 |
+
"text": {
|
17 |
+
"dtype": "string",
|
18 |
+
"_type": "Value"
|
19 |
+
},
|
20 |
+
"audio": {
|
21 |
+
"sampling_rate": 16000,
|
22 |
+
"_type": "Audio"
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"answer": {
|
26 |
+
"text": {
|
27 |
+
"dtype": "string",
|
28 |
+
"_type": "Value"
|
29 |
+
},
|
30 |
+
"audio": {
|
31 |
+
"sampling_rate": 16000,
|
32 |
+
"_type": "Audio"
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"language": {
|
36 |
+
"dtype": "string",
|
37 |
+
"_type": "Value"
|
38 |
+
},
|
39 |
+
"audio_length": {
|
40 |
+
"dtype": "float64",
|
41 |
+
"_type": "Value"
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"homepage": "",
|
45 |
+
"license": ""
|
46 |
+
}
|
examples/ytb_sds_batch3_zh_en/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "ed6654cfb22030f1",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": null
|
13 |
+
}
|
examples/ytb_sqa_batch3_malay/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65537dca9bf4e864da5735abbeb9c7d05fdc2a4da4f7026ba7a8625ea8ec8488
|
3 |
+
size 522560
|
examples/ytb_sqa_batch3_malay/dataset_info.json
CHANGED
@@ -39,28 +39,6 @@
|
|
39 |
"audio_length": {
|
40 |
"dtype": "float64",
|
41 |
"_type": "Value"
|
42 |
-
},
|
43 |
-
"other_attributes": {
|
44 |
-
"youtube_name": {
|
45 |
-
"dtype": "string",
|
46 |
-
"_type": "Value"
|
47 |
-
},
|
48 |
-
"start_time_in_sec": {
|
49 |
-
"dtype": "float32",
|
50 |
-
"_type": "Value"
|
51 |
-
},
|
52 |
-
"end_time_in_sec": {
|
53 |
-
"dtype": "float32",
|
54 |
-
"_type": "Value"
|
55 |
-
},
|
56 |
-
"link": {
|
57 |
-
"dtype": "string",
|
58 |
-
"_type": "Value"
|
59 |
-
},
|
60 |
-
"percentage_of_malay": {
|
61 |
-
"dtype": "string",
|
62 |
-
"_type": "Value"
|
63 |
-
}
|
64 |
}
|
65 |
},
|
66 |
"homepage": "",
|
|
|
39 |
"audio_length": {
|
40 |
"dtype": "float64",
|
41 |
"_type": "Value"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
}
|
43 |
},
|
44 |
"homepage": "",
|
examples/ytb_sqa_batch3_malay/state.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
"filename": "data-00000-of-00001.arrow"
|
5 |
}
|
6 |
],
|
7 |
-
"_fingerprint": "
|
8 |
"_format_columns": null,
|
9 |
"_format_kwargs": {},
|
10 |
"_format_type": null,
|
|
|
4 |
"filename": "data-00000-of-00001.arrow"
|
5 |
}
|
6 |
],
|
7 |
+
"_fingerprint": "a649ce1ef1dbae49",
|
8 |
"_format_columns": null,
|
9 |
"_format_kwargs": {},
|
10 |
"_format_type": null,
|
examples/ytb_sqa_batch3_ms_en/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ae02cfd4136fddda700691053d522dbb5fa918437be53957091dcc58cc39544e
|
3 |
+
size 449184
|
examples/ytb_sqa_batch3_ms_en/dataset_info.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"context": {
|
6 |
+
"text": {
|
7 |
+
"dtype": "string",
|
8 |
+
"_type": "Value"
|
9 |
+
},
|
10 |
+
"audio": {
|
11 |
+
"sampling_rate": 16000,
|
12 |
+
"_type": "Audio"
|
13 |
+
}
|
14 |
+
},
|
15 |
+
"instruction": {
|
16 |
+
"text": {
|
17 |
+
"dtype": "string",
|
18 |
+
"_type": "Value"
|
19 |
+
},
|
20 |
+
"audio": {
|
21 |
+
"sampling_rate": 16000,
|
22 |
+
"_type": "Audio"
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"answer": {
|
26 |
+
"text": {
|
27 |
+
"dtype": "string",
|
28 |
+
"_type": "Value"
|
29 |
+
},
|
30 |
+
"audio": {
|
31 |
+
"sampling_rate": 16000,
|
32 |
+
"_type": "Audio"
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"language": {
|
36 |
+
"dtype": "string",
|
37 |
+
"_type": "Value"
|
38 |
+
},
|
39 |
+
"audio_length": {
|
40 |
+
"dtype": "float64",
|
41 |
+
"_type": "Value"
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"homepage": "",
|
45 |
+
"license": ""
|
46 |
+
}
|
examples/ytb_sqa_batch3_ms_en/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "29050d8340f4d543",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": null
|
13 |
+
}
|
examples/ytb_sqa_batch3_tamil/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:716516113f010b22552492d834a5ef4b2c612a95794ac284868d95c470c6c088
|
3 |
+
size 459952
|
examples/ytb_sqa_batch3_tamil/dataset_info.json
CHANGED
@@ -39,28 +39,6 @@
|
|
39 |
"audio_length": {
|
40 |
"dtype": "float64",
|
41 |
"_type": "Value"
|
42 |
-
},
|
43 |
-
"other_attributes": {
|
44 |
-
"youtube_name": {
|
45 |
-
"dtype": "string",
|
46 |
-
"_type": "Value"
|
47 |
-
},
|
48 |
-
"start_time_in_sec": {
|
49 |
-
"dtype": "float32",
|
50 |
-
"_type": "Value"
|
51 |
-
},
|
52 |
-
"end_time_in_sec": {
|
53 |
-
"dtype": "float32",
|
54 |
-
"_type": "Value"
|
55 |
-
},
|
56 |
-
"link": {
|
57 |
-
"dtype": "string",
|
58 |
-
"_type": "Value"
|
59 |
-
},
|
60 |
-
"percentage_of_tamil": {
|
61 |
-
"dtype": "string",
|
62 |
-
"_type": "Value"
|
63 |
-
}
|
64 |
}
|
65 |
},
|
66 |
"homepage": "",
|
|
|
39 |
"audio_length": {
|
40 |
"dtype": "float64",
|
41 |
"_type": "Value"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
}
|
43 |
},
|
44 |
"homepage": "",
|
examples/ytb_sqa_batch3_tamil/state.json
CHANGED
@@ -4,7 +4,7 @@
|
|
4 |
"filename": "data-00000-of-00001.arrow"
|
5 |
}
|
6 |
],
|
7 |
-
"_fingerprint": "
|
8 |
"_format_columns": null,
|
9 |
"_format_kwargs": {},
|
10 |
"_format_type": null,
|
|
|
4 |
"filename": "data-00000-of-00001.arrow"
|
5 |
}
|
6 |
],
|
7 |
+
"_fingerprint": "d09b8f559f6fd39a",
|
8 |
"_format_columns": null,
|
9 |
"_format_kwargs": {},
|
10 |
"_format_type": null,
|
examples/ytb_sqa_batch3_zh_en/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:063c4ef96e3a269d352c9739210f67b5be7ef4db22ce18df669581e671eb5116
|
3 |
+
size 330536
|
examples/ytb_sqa_batch3_zh_en/dataset_info.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"context": {
|
6 |
+
"text": {
|
7 |
+
"dtype": "string",
|
8 |
+
"_type": "Value"
|
9 |
+
},
|
10 |
+
"audio": {
|
11 |
+
"sampling_rate": 16000,
|
12 |
+
"_type": "Audio"
|
13 |
+
}
|
14 |
+
},
|
15 |
+
"instruction": {
|
16 |
+
"text": {
|
17 |
+
"dtype": "string",
|
18 |
+
"_type": "Value"
|
19 |
+
},
|
20 |
+
"audio": {
|
21 |
+
"sampling_rate": 16000,
|
22 |
+
"_type": "Audio"
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"answer": {
|
26 |
+
"text": {
|
27 |
+
"dtype": "string",
|
28 |
+
"_type": "Value"
|
29 |
+
},
|
30 |
+
"audio": {
|
31 |
+
"sampling_rate": 16000,
|
32 |
+
"_type": "Audio"
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"language": {
|
36 |
+
"dtype": "string",
|
37 |
+
"_type": "Value"
|
38 |
+
},
|
39 |
+
"audio_length": {
|
40 |
+
"dtype": "float64",
|
41 |
+
"_type": "Value"
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"homepage": "",
|
45 |
+
"license": ""
|
46 |
+
}
|
examples/ytb_sqa_batch3_zh_en/state.json
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "9246b136a32aff2c",
|
8 |
+
"_format_columns": null,
|
9 |
+
"_format_kwargs": {},
|
10 |
+
"_format_type": null,
|
11 |
+
"_output_all_columns": false,
|
12 |
+
"_split": null
|
13 |
+
}
|
examples/{ds_name}/data-00000-of-00001.arrow
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8ff81b8aa00b0cc8854ae8d977542a03c94a8da90e773a38681dfb10465e7242
|
3 |
+
size 461672
|
examples/{ds_name}/dataset_info.json
ADDED
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"citation": "",
|
3 |
+
"description": "",
|
4 |
+
"features": {
|
5 |
+
"context": {
|
6 |
+
"text": {
|
7 |
+
"dtype": "string",
|
8 |
+
"_type": "Value"
|
9 |
+
},
|
10 |
+
"audio": {
|
11 |
+
"sampling_rate": 16000,
|
12 |
+
"_type": "Audio"
|
13 |
+
}
|
14 |
+
},
|
15 |
+
"instruction": {
|
16 |
+
"text": {
|
17 |
+
"dtype": "string",
|
18 |
+
"_type": "Value"
|
19 |
+
},
|
20 |
+
"audio": {
|
21 |
+
"sampling_rate": 16000,
|
22 |
+
"_type": "Audio"
|
23 |
+
}
|
24 |
+
},
|
25 |
+
"answer": {
|
26 |
+
"text": {
|
27 |
+
"dtype": "string",
|
28 |
+
"_type": "Value"
|
29 |
+
},
|
30 |
+
"audio": {
|
31 |
+
"sampling_rate": 16000,
|
32 |
+
"_type": "Audio"
|
33 |
+
}
|
34 |
+
},
|
35 |
+
"language": {
|
36 |
+
"dtype": "string",
|
37 |
+
"_type": "Value"
|
38 |
+
},
|
39 |
+
"audio_length": {
|
40 |
+
"dtype": "float64",
|
41 |
+
"_type": "Value"
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"homepage": "",
|
45 |
+
"license": ""
|
46 |
+
}
|
examples/{ds_name}/state.json
ADDED
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_data_files": [
|
3 |
+
{
|
4 |
+
"filename": "data-00000-of-00001.arrow"
|
5 |
+
}
|
6 |
+
],
|
7 |
+
"_fingerprint": "2870b4932cfdd640",
|
8 |
+
"_format_columns": [
|
9 |
+
"answer",
|
10 |
+
"audio_length",
|
11 |
+
"context",
|
12 |
+
"instruction",
|
13 |
+
"language"
|
14 |
+
],
|
15 |
+
"_format_kwargs": {},
|
16 |
+
"_format_type": null,
|
17 |
+
"_output_all_columns": false,
|
18 |
+
"_split": null
|
19 |
+
}
|