zhuohan-7 commited on
Commit
d4e2b9d
·
verified ·
1 Parent(s): d931f3e

Upload folder using huggingface_hub

Browse files
Files changed (39) hide show
  1. examples/ytb_asr_batch3_ms_en/data-00000-of-00001.arrow +3 -0
  2. examples/ytb_asr_batch3_ms_en/dataset_info.json +46 -0
  3. examples/ytb_asr_batch3_ms_en/state.json +13 -0
  4. examples/ytb_asr_batch3_ta_en/data-00000-of-00001.arrow +3 -0
  5. examples/ytb_asr_batch3_ta_en/dataset_info.json +46 -0
  6. examples/ytb_asr_batch3_ta_en/state.json +13 -0
  7. examples/ytb_asr_batch3_zh_en/data-00000-of-00001.arrow +3 -0
  8. examples/ytb_asr_batch3_zh_en/dataset_info.json +68 -0
  9. examples/ytb_asr_batch3_zh_en/state.json +13 -0
  10. examples/ytb_sds_batch3_chinese/data-00000-of-00001.arrow +3 -0
  11. examples/ytb_sds_batch3_chinese/dataset_info.json +0 -22
  12. examples/ytb_sds_batch3_chinese/state.json +8 -2
  13. examples/ytb_sds_batch3_malay/data-00000-of-00001.arrow +3 -0
  14. examples/ytb_sds_batch3_malay/dataset_info.json +0 -22
  15. examples/ytb_sds_batch3_malay/state.json +1 -1
  16. examples/ytb_sds_batch3_ms_en/data-00000-of-00001.arrow +3 -0
  17. examples/ytb_sds_batch3_ms_en/dataset_info.json +46 -0
  18. examples/ytb_sds_batch3_ms_en/state.json +13 -0
  19. examples/ytb_sds_batch3_tamil/data-00000-of-00001.arrow +3 -0
  20. examples/ytb_sds_batch3_tamil/dataset_info.json +0 -22
  21. examples/ytb_sds_batch3_tamil/state.json +1 -1
  22. examples/ytb_sds_batch3_zh_en/data-00000-of-00001.arrow +3 -0
  23. examples/ytb_sds_batch3_zh_en/dataset_info.json +46 -0
  24. examples/ytb_sds_batch3_zh_en/state.json +13 -0
  25. examples/ytb_sqa_batch3_malay/data-00000-of-00001.arrow +3 -0
  26. examples/ytb_sqa_batch3_malay/dataset_info.json +0 -22
  27. examples/ytb_sqa_batch3_malay/state.json +1 -1
  28. examples/ytb_sqa_batch3_ms_en/data-00000-of-00001.arrow +3 -0
  29. examples/ytb_sqa_batch3_ms_en/dataset_info.json +46 -0
  30. examples/ytb_sqa_batch3_ms_en/state.json +13 -0
  31. examples/ytb_sqa_batch3_tamil/data-00000-of-00001.arrow +3 -0
  32. examples/ytb_sqa_batch3_tamil/dataset_info.json +0 -22
  33. examples/ytb_sqa_batch3_tamil/state.json +1 -1
  34. examples/ytb_sqa_batch3_zh_en/data-00000-of-00001.arrow +3 -0
  35. examples/ytb_sqa_batch3_zh_en/dataset_info.json +46 -0
  36. examples/ytb_sqa_batch3_zh_en/state.json +13 -0
  37. examples/{ds_name}/data-00000-of-00001.arrow +3 -0
  38. examples/{ds_name}/dataset_info.json +46 -0
  39. examples/{ds_name}/state.json +19 -0
examples/ytb_asr_batch3_ms_en/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:208c237357205ca335366be040f3b0942a2b74c9e02fc8a1f6cc129a753af1ad
3
+ size 456232
examples/ytb_asr_batch3_ms_en/dataset_info.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "context": {
6
+ "text": {
7
+ "dtype": "string",
8
+ "_type": "Value"
9
+ },
10
+ "audio": {
11
+ "sampling_rate": 16000,
12
+ "_type": "Audio"
13
+ }
14
+ },
15
+ "instruction": {
16
+ "text": {
17
+ "dtype": "string",
18
+ "_type": "Value"
19
+ },
20
+ "audio": {
21
+ "sampling_rate": 16000,
22
+ "_type": "Audio"
23
+ }
24
+ },
25
+ "answer": {
26
+ "text": {
27
+ "dtype": "string",
28
+ "_type": "Value"
29
+ },
30
+ "audio": {
31
+ "sampling_rate": 16000,
32
+ "_type": "Audio"
33
+ }
34
+ },
35
+ "language": {
36
+ "dtype": "string",
37
+ "_type": "Value"
38
+ },
39
+ "audio_length": {
40
+ "dtype": "float64",
41
+ "_type": "Value"
42
+ }
43
+ },
44
+ "homepage": "",
45
+ "license": ""
46
+ }
examples/ytb_asr_batch3_ms_en/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "9477ebec80490c46",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
examples/ytb_asr_batch3_ta_en/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69bbb5cb413cf1f1f6644b16f9472f903f13ee8e355d34e00242238cbfa49247
3
+ size 1381736
examples/ytb_asr_batch3_ta_en/dataset_info.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "context": {
6
+ "text": {
7
+ "dtype": "string",
8
+ "_type": "Value"
9
+ },
10
+ "audio": {
11
+ "sampling_rate": 16000,
12
+ "_type": "Audio"
13
+ }
14
+ },
15
+ "instruction": {
16
+ "text": {
17
+ "dtype": "string",
18
+ "_type": "Value"
19
+ },
20
+ "audio": {
21
+ "sampling_rate": 16000,
22
+ "_type": "Audio"
23
+ }
24
+ },
25
+ "answer": {
26
+ "text": {
27
+ "dtype": "string",
28
+ "_type": "Value"
29
+ },
30
+ "audio": {
31
+ "sampling_rate": 16000,
32
+ "_type": "Audio"
33
+ }
34
+ },
35
+ "language": {
36
+ "dtype": "string",
37
+ "_type": "Value"
38
+ },
39
+ "audio_length": {
40
+ "dtype": "float64",
41
+ "_type": "Value"
42
+ }
43
+ },
44
+ "homepage": "",
45
+ "license": ""
46
+ }
examples/ytb_asr_batch3_ta_en/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "91fc9969182cd36e",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
examples/ytb_asr_batch3_zh_en/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18a952214ba1c5c6519ce77b49aada2d250650fd70cc1270e812b62ad1cc6c4d
3
+ size 387600
examples/ytb_asr_batch3_zh_en/dataset_info.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "context": {
6
+ "text": {
7
+ "dtype": "string",
8
+ "_type": "Value"
9
+ },
10
+ "audio": {
11
+ "sampling_rate": 16000,
12
+ "_type": "Audio"
13
+ }
14
+ },
15
+ "instruction": {
16
+ "text": {
17
+ "dtype": "string",
18
+ "_type": "Value"
19
+ },
20
+ "audio": {
21
+ "sampling_rate": 16000,
22
+ "_type": "Audio"
23
+ }
24
+ },
25
+ "answer": {
26
+ "text": {
27
+ "dtype": "string",
28
+ "_type": "Value"
29
+ },
30
+ "audio": {
31
+ "sampling_rate": 16000,
32
+ "_type": "Audio"
33
+ }
34
+ },
35
+ "language": {
36
+ "dtype": "string",
37
+ "_type": "Value"
38
+ },
39
+ "audio_length": {
40
+ "dtype": "float64",
41
+ "_type": "Value"
42
+ },
43
+ "other_attributes": {
44
+ "youtube_name": {
45
+ "dtype": "string",
46
+ "_type": "Value"
47
+ },
48
+ "start_time_in_sec": {
49
+ "dtype": "float32",
50
+ "_type": "Value"
51
+ },
52
+ "end_time_in_sec": {
53
+ "dtype": "float32",
54
+ "_type": "Value"
55
+ },
56
+ "link": {
57
+ "dtype": "string",
58
+ "_type": "Value"
59
+ },
60
+ "percentage_of_chinese": {
61
+ "dtype": "string",
62
+ "_type": "Value"
63
+ }
64
+ }
65
+ },
66
+ "homepage": "",
67
+ "license": ""
68
+ }
examples/ytb_asr_batch3_zh_en/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "0aa769aa06bf3bf6",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
examples/ytb_sds_batch3_chinese/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dfc9d9b3cb2c52535641e6e080aae7608bdea7793a75b0d14cf5e3c2cdebf26
3
+ size 305136
examples/ytb_sds_batch3_chinese/dataset_info.json CHANGED
@@ -39,28 +39,6 @@
39
  "audio_length": {
40
  "dtype": "float64",
41
  "_type": "Value"
42
- },
43
- "other_attributes": {
44
- "youtube_name": {
45
- "dtype": "string",
46
- "_type": "Value"
47
- },
48
- "start_time_in_sec": {
49
- "dtype": "float32",
50
- "_type": "Value"
51
- },
52
- "end_time_in_sec": {
53
- "dtype": "float32",
54
- "_type": "Value"
55
- },
56
- "link": {
57
- "dtype": "string",
58
- "_type": "Value"
59
- },
60
- "percentage_of_chinese": {
61
- "dtype": "string",
62
- "_type": "Value"
63
- }
64
  }
65
  },
66
  "homepage": "",
 
39
  "audio_length": {
40
  "dtype": "float64",
41
  "_type": "Value"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
  },
44
  "homepage": "",
examples/ytb_sds_batch3_chinese/state.json CHANGED
@@ -4,8 +4,14 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "5525a31ce5d1f794",
8
- "_format_columns": null,
 
 
 
 
 
 
9
  "_format_kwargs": {},
10
  "_format_type": null,
11
  "_output_all_columns": false,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "4e7730924a13bda6",
8
+ "_format_columns": [
9
+ "answer",
10
+ "audio_length",
11
+ "context",
12
+ "instruction",
13
+ "language"
14
+ ],
15
  "_format_kwargs": {},
16
  "_format_type": null,
17
  "_output_all_columns": false,
examples/ytb_sds_batch3_malay/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44db0547d89fc3b1eda39d05d1d2cc69975e65815b9a9e5bb68937f6b54b3808
3
+ size 423552
examples/ytb_sds_batch3_malay/dataset_info.json CHANGED
@@ -39,28 +39,6 @@
39
  "audio_length": {
40
  "dtype": "float64",
41
  "_type": "Value"
42
- },
43
- "other_attributes": {
44
- "youtube_name": {
45
- "dtype": "string",
46
- "_type": "Value"
47
- },
48
- "start_time_in_sec": {
49
- "dtype": "float32",
50
- "_type": "Value"
51
- },
52
- "end_time_in_sec": {
53
- "dtype": "float32",
54
- "_type": "Value"
55
- },
56
- "link": {
57
- "dtype": "string",
58
- "_type": "Value"
59
- },
60
- "percentage_of_malay": {
61
- "dtype": "string",
62
- "_type": "Value"
63
- }
64
  }
65
  },
66
  "homepage": "",
 
39
  "audio_length": {
40
  "dtype": "float64",
41
  "_type": "Value"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
  },
44
  "homepage": "",
examples/ytb_sds_batch3_malay/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "b3847ba8a7a25ab2",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "908fa2e876912bda",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
examples/ytb_sds_batch3_ms_en/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5379c93c60062a5ef603be99787cdf1e2cf654291e9abb888fe2828cf1b4a83
3
+ size 471624
examples/ytb_sds_batch3_ms_en/dataset_info.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "context": {
6
+ "text": {
7
+ "dtype": "string",
8
+ "_type": "Value"
9
+ },
10
+ "audio": {
11
+ "sampling_rate": 16000,
12
+ "_type": "Audio"
13
+ }
14
+ },
15
+ "instruction": {
16
+ "text": {
17
+ "dtype": "string",
18
+ "_type": "Value"
19
+ },
20
+ "audio": {
21
+ "sampling_rate": 16000,
22
+ "_type": "Audio"
23
+ }
24
+ },
25
+ "answer": {
26
+ "text": {
27
+ "dtype": "string",
28
+ "_type": "Value"
29
+ },
30
+ "audio": {
31
+ "sampling_rate": 16000,
32
+ "_type": "Audio"
33
+ }
34
+ },
35
+ "language": {
36
+ "dtype": "string",
37
+ "_type": "Value"
38
+ },
39
+ "audio_length": {
40
+ "dtype": "float64",
41
+ "_type": "Value"
42
+ }
43
+ },
44
+ "homepage": "",
45
+ "license": ""
46
+ }
examples/ytb_sds_batch3_ms_en/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "0b7560df0446fc80",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
examples/ytb_sds_batch3_tamil/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d6122b9f9c788fd5c1a41487b39451646c205e5531ec656ddd177dc216c3f64
3
+ size 422976
examples/ytb_sds_batch3_tamil/dataset_info.json CHANGED
@@ -39,28 +39,6 @@
39
  "audio_length": {
40
  "dtype": "float64",
41
  "_type": "Value"
42
- },
43
- "other_attributes": {
44
- "youtube_name": {
45
- "dtype": "string",
46
- "_type": "Value"
47
- },
48
- "start_time_in_sec": {
49
- "dtype": "float32",
50
- "_type": "Value"
51
- },
52
- "end_time_in_sec": {
53
- "dtype": "float32",
54
- "_type": "Value"
55
- },
56
- "link": {
57
- "dtype": "string",
58
- "_type": "Value"
59
- },
60
- "percentage_of_tamil": {
61
- "dtype": "string",
62
- "_type": "Value"
63
- }
64
  }
65
  },
66
  "homepage": "",
 
39
  "audio_length": {
40
  "dtype": "float64",
41
  "_type": "Value"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
  },
44
  "homepage": "",
examples/ytb_sds_batch3_tamil/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "4b552c5c1c6b2339",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "3b85be829d277232",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
examples/ytb_sds_batch3_zh_en/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2db4c8a847e99be0c1f1852e713d9a18c569eabd9a2468155c0a191df7b24382
3
+ size 427944
examples/ytb_sds_batch3_zh_en/dataset_info.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "context": {
6
+ "text": {
7
+ "dtype": "string",
8
+ "_type": "Value"
9
+ },
10
+ "audio": {
11
+ "sampling_rate": 16000,
12
+ "_type": "Audio"
13
+ }
14
+ },
15
+ "instruction": {
16
+ "text": {
17
+ "dtype": "string",
18
+ "_type": "Value"
19
+ },
20
+ "audio": {
21
+ "sampling_rate": 16000,
22
+ "_type": "Audio"
23
+ }
24
+ },
25
+ "answer": {
26
+ "text": {
27
+ "dtype": "string",
28
+ "_type": "Value"
29
+ },
30
+ "audio": {
31
+ "sampling_rate": 16000,
32
+ "_type": "Audio"
33
+ }
34
+ },
35
+ "language": {
36
+ "dtype": "string",
37
+ "_type": "Value"
38
+ },
39
+ "audio_length": {
40
+ "dtype": "float64",
41
+ "_type": "Value"
42
+ }
43
+ },
44
+ "homepage": "",
45
+ "license": ""
46
+ }
examples/ytb_sds_batch3_zh_en/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "ed6654cfb22030f1",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
examples/ytb_sqa_batch3_malay/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65537dca9bf4e864da5735abbeb9c7d05fdc2a4da4f7026ba7a8625ea8ec8488
3
+ size 522560
examples/ytb_sqa_batch3_malay/dataset_info.json CHANGED
@@ -39,28 +39,6 @@
39
  "audio_length": {
40
  "dtype": "float64",
41
  "_type": "Value"
42
- },
43
- "other_attributes": {
44
- "youtube_name": {
45
- "dtype": "string",
46
- "_type": "Value"
47
- },
48
- "start_time_in_sec": {
49
- "dtype": "float32",
50
- "_type": "Value"
51
- },
52
- "end_time_in_sec": {
53
- "dtype": "float32",
54
- "_type": "Value"
55
- },
56
- "link": {
57
- "dtype": "string",
58
- "_type": "Value"
59
- },
60
- "percentage_of_malay": {
61
- "dtype": "string",
62
- "_type": "Value"
63
- }
64
  }
65
  },
66
  "homepage": "",
 
39
  "audio_length": {
40
  "dtype": "float64",
41
  "_type": "Value"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
  },
44
  "homepage": "",
examples/ytb_sqa_batch3_malay/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "752dc89a356333ed",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "a649ce1ef1dbae49",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
examples/ytb_sqa_batch3_ms_en/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae02cfd4136fddda700691053d522dbb5fa918437be53957091dcc58cc39544e
3
+ size 449184
examples/ytb_sqa_batch3_ms_en/dataset_info.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "context": {
6
+ "text": {
7
+ "dtype": "string",
8
+ "_type": "Value"
9
+ },
10
+ "audio": {
11
+ "sampling_rate": 16000,
12
+ "_type": "Audio"
13
+ }
14
+ },
15
+ "instruction": {
16
+ "text": {
17
+ "dtype": "string",
18
+ "_type": "Value"
19
+ },
20
+ "audio": {
21
+ "sampling_rate": 16000,
22
+ "_type": "Audio"
23
+ }
24
+ },
25
+ "answer": {
26
+ "text": {
27
+ "dtype": "string",
28
+ "_type": "Value"
29
+ },
30
+ "audio": {
31
+ "sampling_rate": 16000,
32
+ "_type": "Audio"
33
+ }
34
+ },
35
+ "language": {
36
+ "dtype": "string",
37
+ "_type": "Value"
38
+ },
39
+ "audio_length": {
40
+ "dtype": "float64",
41
+ "_type": "Value"
42
+ }
43
+ },
44
+ "homepage": "",
45
+ "license": ""
46
+ }
examples/ytb_sqa_batch3_ms_en/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "29050d8340f4d543",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
examples/ytb_sqa_batch3_tamil/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:716516113f010b22552492d834a5ef4b2c612a95794ac284868d95c470c6c088
3
+ size 459952
examples/ytb_sqa_batch3_tamil/dataset_info.json CHANGED
@@ -39,28 +39,6 @@
39
  "audio_length": {
40
  "dtype": "float64",
41
  "_type": "Value"
42
- },
43
- "other_attributes": {
44
- "youtube_name": {
45
- "dtype": "string",
46
- "_type": "Value"
47
- },
48
- "start_time_in_sec": {
49
- "dtype": "float32",
50
- "_type": "Value"
51
- },
52
- "end_time_in_sec": {
53
- "dtype": "float32",
54
- "_type": "Value"
55
- },
56
- "link": {
57
- "dtype": "string",
58
- "_type": "Value"
59
- },
60
- "percentage_of_tamil": {
61
- "dtype": "string",
62
- "_type": "Value"
63
- }
64
  }
65
  },
66
  "homepage": "",
 
39
  "audio_length": {
40
  "dtype": "float64",
41
  "_type": "Value"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  }
43
  },
44
  "homepage": "",
examples/ytb_sqa_batch3_tamil/state.json CHANGED
@@ -4,7 +4,7 @@
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
- "_fingerprint": "4a255eac2d3b2c4d",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
 
4
  "filename": "data-00000-of-00001.arrow"
5
  }
6
  ],
7
+ "_fingerprint": "d09b8f559f6fd39a",
8
  "_format_columns": null,
9
  "_format_kwargs": {},
10
  "_format_type": null,
examples/ytb_sqa_batch3_zh_en/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:063c4ef96e3a269d352c9739210f67b5be7ef4db22ce18df669581e671eb5116
3
+ size 330536
examples/ytb_sqa_batch3_zh_en/dataset_info.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "context": {
6
+ "text": {
7
+ "dtype": "string",
8
+ "_type": "Value"
9
+ },
10
+ "audio": {
11
+ "sampling_rate": 16000,
12
+ "_type": "Audio"
13
+ }
14
+ },
15
+ "instruction": {
16
+ "text": {
17
+ "dtype": "string",
18
+ "_type": "Value"
19
+ },
20
+ "audio": {
21
+ "sampling_rate": 16000,
22
+ "_type": "Audio"
23
+ }
24
+ },
25
+ "answer": {
26
+ "text": {
27
+ "dtype": "string",
28
+ "_type": "Value"
29
+ },
30
+ "audio": {
31
+ "sampling_rate": 16000,
32
+ "_type": "Audio"
33
+ }
34
+ },
35
+ "language": {
36
+ "dtype": "string",
37
+ "_type": "Value"
38
+ },
39
+ "audio_length": {
40
+ "dtype": "float64",
41
+ "_type": "Value"
42
+ }
43
+ },
44
+ "homepage": "",
45
+ "license": ""
46
+ }
examples/ytb_sqa_batch3_zh_en/state.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "9246b136a32aff2c",
8
+ "_format_columns": null,
9
+ "_format_kwargs": {},
10
+ "_format_type": null,
11
+ "_output_all_columns": false,
12
+ "_split": null
13
+ }
examples/{ds_name}/data-00000-of-00001.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ff81b8aa00b0cc8854ae8d977542a03c94a8da90e773a38681dfb10465e7242
3
+ size 461672
examples/{ds_name}/dataset_info.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "citation": "",
3
+ "description": "",
4
+ "features": {
5
+ "context": {
6
+ "text": {
7
+ "dtype": "string",
8
+ "_type": "Value"
9
+ },
10
+ "audio": {
11
+ "sampling_rate": 16000,
12
+ "_type": "Audio"
13
+ }
14
+ },
15
+ "instruction": {
16
+ "text": {
17
+ "dtype": "string",
18
+ "_type": "Value"
19
+ },
20
+ "audio": {
21
+ "sampling_rate": 16000,
22
+ "_type": "Audio"
23
+ }
24
+ },
25
+ "answer": {
26
+ "text": {
27
+ "dtype": "string",
28
+ "_type": "Value"
29
+ },
30
+ "audio": {
31
+ "sampling_rate": 16000,
32
+ "_type": "Audio"
33
+ }
34
+ },
35
+ "language": {
36
+ "dtype": "string",
37
+ "_type": "Value"
38
+ },
39
+ "audio_length": {
40
+ "dtype": "float64",
41
+ "_type": "Value"
42
+ }
43
+ },
44
+ "homepage": "",
45
+ "license": ""
46
+ }
examples/{ds_name}/state.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_data_files": [
3
+ {
4
+ "filename": "data-00000-of-00001.arrow"
5
+ }
6
+ ],
7
+ "_fingerprint": "2870b4932cfdd640",
8
+ "_format_columns": [
9
+ "answer",
10
+ "audio_length",
11
+ "context",
12
+ "instruction",
13
+ "language"
14
+ ],
15
+ "_format_kwargs": {},
16
+ "_format_type": null,
17
+ "_output_all_columns": false,
18
+ "_split": null
19
+ }