tuandunghcmut commited on
Commit
4196a79
·
verified ·
1 Parent(s): c7da1cb

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen.py +4 -0
  2. opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_1bd3c8.py +35 -0
  3. opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_3749cd.py +33 -0
  4. opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_8484b9.py +27 -0
  5. opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_941108.py +34 -0
  6. opencompass/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_gen.py +4 -0
  7. opencompass/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_gen_740ea0.py +49 -0
  8. opencompass/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_ppl.py +4 -0
  9. opencompass/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_ppl_1ce587.py +41 -0
  10. opencompass/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_ppl_f1e631.py +49 -0
  11. opencompass/configs/datasets/PJExam/PJExam_gen.py +4 -0
  12. opencompass/configs/datasets/PJExam/PJExam_gen_8cd97c.py +54 -0
  13. opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen.py +4 -0
  14. opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen_91ca53.py +44 -0
  15. opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py +4 -0
  16. opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_54058d.py +34 -0
  17. opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_5c24f1.py +45 -0
  18. opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_9f3618.py +49 -0
  19. opencompass/configs/datasets/adv_glue/__init__.py +11 -0
  20. opencompass/configs/datasets/collections/base_core.py +20 -0
  21. opencompass/configs/datasets/collections/base_medium.py +56 -0
  22. opencompass/configs/datasets/collections/base_medium_llama.py +56 -0
  23. opencompass/configs/datasets/collections/base_small.py +38 -0
  24. opencompass/configs/datasets/collections/chat_core.py +20 -0
  25. opencompass/configs/datasets/collections/chat_medium.py +56 -0
  26. opencompass/configs/datasets/collections/chat_small.py +39 -0
  27. opencompass/configs/datasets/collections/example.py +7 -0
  28. opencompass/configs/datasets/ds1000/ds1000_compl_gen_cbc84f.py +69 -0
  29. opencompass/configs/datasets/ds1000/ds1000_compl_service_eval_gen_cbc84f.py +68 -0
  30. opencompass/configs/datasets/ds1000/ds1000_gen_5c4bec.py +84 -0
  31. opencompass/configs/datasets/ds1000/ds1000_gen_cbc84f.py +67 -0
  32. opencompass/configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py +67 -0
  33. opencompass/configs/datasets/gaokao_math/README.md +108 -0
  34. opencompass/configs/datasets/gaokao_math/gaokao_math_gen_f5fd28.py +48 -0
  35. opencompass/configs/datasets/humaneval_cn/humaneval_cn_gen.py +4 -0
  36. opencompass/configs/datasets/humaneval_cn/humaneval_cn_gen_6313aa.py +37 -0
  37. opencompass/configs/datasets/humaneval_cn/humaneval_cn_passk_gen_6313aa.py +37 -0
  38. opencompass/configs/datasets/humaneval_cn/humaneval_cn_repeat10_gen_6313aa.py +38 -0
  39. opencompass/configs/datasets/jigsawmultilingual/jigsawmultilingual_clp.py +4 -0
  40. opencompass/configs/datasets/jigsawmultilingual/jigsawmultilingual_clp_1af0ae.py +43 -0
  41. opencompass/configs/datasets/jigsawmultilingual/jigsawmultilingual_clp_fe50d8.py +47 -0
  42. opencompass/configs/datasets/livecodebench/README.md +42 -0
  43. opencompass/configs/datasets/livecodebench/livecodebench_gen.py +4 -0
  44. opencompass/configs/datasets/livecodebench/livecodebench_gen_6966bc.py +164 -0
  45. opencompass/configs/datasets/livecodebench/livecodebench_gen_b2b0fd.py +163 -0
  46. opencompass/configs/datasets/mbpp_plus/deprecated_mbpp_plus_gen_94815c.py +64 -0
  47. opencompass/configs/datasets/mbpp_plus/mbpp_plus_gen_0b836a.py +64 -0
  48. opencompass/configs/datasets/nq_cn/nqcn_gen.py +4 -0
  49. opencompass/configs/datasets/nq_cn/nqcn_gen_141737.py +34 -0
  50. opencompass/configs/datasets/race/race_cot_gen_d95929.py +68 -0
opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from .CLUE_CMRC_gen_1bd3c8 import CMRC_datasets # noqa: F401, F403
opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_1bd3c8.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.openicl.icl_evaluator import EMEvaluator
5
+ from opencompass.datasets import CMRCDataset, cmrc_postprocess
6
+
7
+ CMRC_reader_cfg = dict(
8
+ input_columns=['question', 'context'], output_column='answers')
9
+
10
+ CMRC_infer_cfg = dict(
11
+ prompt_template=dict(
12
+ type=PromptTemplate,
13
+ template=dict(round=[
14
+ dict(
15
+ role='HUMAN',
16
+ prompt='根据文章回答问题。你的答案应该尽可能简练,请以 ‘答案是’ 开头的句式作答。\n文章:{context}\n问:{question}\n答:'),
17
+ ])),
18
+ retriever=dict(type=ZeroRetriever),
19
+ inferencer=dict(type=GenInferencer))
20
+
21
+ CMRC_eval_cfg = dict(
22
+ evaluator=dict(type=EMEvaluator),
23
+ pred_role='BOT',
24
+ pred_postprocessor=dict(type=cmrc_postprocess),
25
+ )
26
+
27
+ CMRC_datasets = [
28
+ dict(
29
+ type=CMRCDataset,
30
+ abbr='CMRC_dev',
31
+ path='opencompass/cmrc_dev',
32
+ reader_cfg=CMRC_reader_cfg,
33
+ infer_cfg=CMRC_infer_cfg,
34
+ eval_cfg=CMRC_eval_cfg),
35
+ ]
opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_3749cd.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.openicl.icl_evaluator import EMEvaluator
5
+ from opencompass.datasets import CMRCDataset
6
+
7
+ CMRC_reader_cfg = dict(
8
+ input_columns=['question', 'context'], output_column='answers')
9
+
10
+ CMRC_infer_cfg = dict(
11
+ prompt_template=dict(
12
+ type=PromptTemplate,
13
+ template=dict(round=[
14
+ dict(role='HUMAN', prompt='文章:{context}\n根据上文,回答如下问题:{question}'),
15
+ dict(role='BOT', prompt='答:'),
16
+ ])),
17
+ retriever=dict(type=ZeroRetriever),
18
+ inferencer=dict(type=GenInferencer))
19
+
20
+ CMRC_eval_cfg = dict(
21
+ evaluator=dict(type=EMEvaluator),
22
+ pred_role='BOT',
23
+ )
24
+
25
+ CMRC_datasets = [
26
+ dict(
27
+ type=CMRCDataset,
28
+ abbr='CMRC_dev',
29
+ path='opencompass/cmrc_dev',
30
+ reader_cfg=CMRC_reader_cfg,
31
+ infer_cfg=CMRC_infer_cfg,
32
+ eval_cfg=CMRC_eval_cfg),
33
+ ]
opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_8484b9.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.openicl.icl_evaluator import EMEvaluator
5
+ from opencompass.datasets import CMRCDataset
6
+
7
+ CMRC_reader_cfg = dict(
8
+ input_columns=['question', 'context'], output_column='answers')
9
+
10
+ CMRC_infer_cfg = dict(
11
+ prompt_template=dict(
12
+ type=PromptTemplate,
13
+ template='文章:{context}\n根据上文,回答如下问题: {question}\n答:'),
14
+ retriever=dict(type=ZeroRetriever),
15
+ inferencer=dict(type=GenInferencer))
16
+
17
+ CMRC_eval_cfg = dict(evaluator=dict(type=EMEvaluator), )
18
+
19
+ CMRC_datasets = [
20
+ dict(
21
+ type=CMRCDataset,
22
+ abbr='CMRC_dev',
23
+ path='opencompass/cmrc_dev',
24
+ reader_cfg=CMRC_reader_cfg,
25
+ infer_cfg=CMRC_infer_cfg,
26
+ eval_cfg=CMRC_eval_cfg),
27
+ ]
opencompass/configs/datasets/CLUE_CMRC/CLUE_CMRC_gen_941108.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.openicl.icl_evaluator import EMEvaluator
5
+ from opencompass.datasets import CMRCDataset
6
+
7
+ CMRC_reader_cfg = dict(
8
+ input_columns=['question', 'context'], output_column='answers')
9
+
10
+ CMRC_infer_cfg = dict(
11
+ prompt_template=dict(
12
+ type=PromptTemplate,
13
+ template=dict(round=[
14
+ dict(
15
+ role='HUMAN',
16
+ prompt='文章:{context}\n根据上文,回答如下问题:\n{question}\n答:'),
17
+ ])),
18
+ retriever=dict(type=ZeroRetriever),
19
+ inferencer=dict(type=GenInferencer))
20
+
21
+ CMRC_eval_cfg = dict(
22
+ evaluator=dict(type=EMEvaluator),
23
+ pred_role='BOT',
24
+ )
25
+
26
+ CMRC_datasets = [
27
+ dict(
28
+ type=CMRCDataset,
29
+ abbr='CMRC_dev',
30
+ path='opencompass/cmrc_dev',
31
+ reader_cfg=CMRC_reader_cfg,
32
+ infer_cfg=CMRC_infer_cfg,
33
+ eval_cfg=CMRC_eval_cfg),
34
+ ]
opencompass/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_gen.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from .FewCLUE_eprstmt_gen_740ea0 import eprstmt_datasets # noqa: F401, F403
opencompass/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_gen_740ea0.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.openicl.icl_evaluator import AccEvaluator
5
+ from opencompass.datasets import EprstmtDatasetV2
6
+ from opencompass.utils.text_postprocessors import first_capital_postprocess
7
+
8
+ eprstmt_reader_cfg = dict(
9
+ input_columns=['sentence'], output_column='label', test_split='train')
10
+
11
+ eprstmt_infer_cfg = dict(
12
+ prompt_template=dict(
13
+ type=PromptTemplate,
14
+ template=dict(round=[
15
+ dict(
16
+ role='HUMAN',
17
+ prompt=
18
+ '内容: "{sentence}"。请对上述内容进行情绪分类。\nA. 积极\nB. 消极\n请从”A“,”B“中进行选择。\n答:'
19
+ ),
20
+ ]),
21
+ ),
22
+ retriever=dict(type=ZeroRetriever),
23
+ inferencer=dict(type=GenInferencer),
24
+ )
25
+
26
+ eprstmt_eval_cfg = dict(
27
+ evaluator=dict(type=AccEvaluator),
28
+ pred_role='BOT',
29
+ pred_postprocessor=dict(type=first_capital_postprocess),
30
+ )
31
+
32
+ eprstmt_datasets = [
33
+ dict(
34
+ abbr='eprstmt-dev',
35
+ type=EprstmtDatasetV2,
36
+ path='./data/FewCLUE/eprstmt/dev_few_all.json',
37
+ reader_cfg=eprstmt_reader_cfg,
38
+ infer_cfg=eprstmt_infer_cfg,
39
+ eval_cfg=eprstmt_eval_cfg,
40
+ ),
41
+ dict(
42
+ abbr='eprstmt-test',
43
+ type=EprstmtDatasetV2,
44
+ path='./data/FewCLUE/eprstmt/test_public.json',
45
+ reader_cfg=eprstmt_reader_cfg,
46
+ infer_cfg=eprstmt_infer_cfg,
47
+ eval_cfg=eprstmt_eval_cfg,
48
+ ),
49
+ ]
opencompass/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_ppl.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from .FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets # noqa: F401, F403
opencompass/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_ppl_1ce587.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import PPLInferencer
4
+ from opencompass.openicl.icl_evaluator import AccEvaluator
5
+ from opencompass.datasets import HFDataset
6
+
7
+ eprstmt_reader_cfg = dict(
8
+ input_columns=['sentence'], output_column='label', test_split='train')
9
+
10
+ eprstmt_infer_cfg = dict(
11
+ prompt_template=dict(
12
+ type=PromptTemplate,
13
+ template={
14
+ 'Negative': ' 内容: "{sentence}"。情绪分类:消极。',
15
+ 'Positive': ' 内容: "{sentence}"。情绪分类:积极。',
16
+ }),
17
+ retriever=dict(type=ZeroRetriever),
18
+ inferencer=dict(type=PPLInferencer))
19
+
20
+ eprstmt_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
21
+
22
+ eprstmt_datasets = [
23
+ dict(
24
+ type=HFDataset,
25
+ abbr='eprstmt-dev',
26
+ path='json',
27
+ data_files='./data/FewCLUE/eprstmt/dev_few_all.json',
28
+ split='train',
29
+ reader_cfg=eprstmt_reader_cfg,
30
+ infer_cfg=eprstmt_infer_cfg,
31
+ eval_cfg=eprstmt_eval_cfg),
32
+ dict(
33
+ type=HFDataset,
34
+ abbr='eprstmt-test',
35
+ path='json',
36
+ data_files='./data/FewCLUE/eprstmt/test_public.json',
37
+ split='train',
38
+ reader_cfg=eprstmt_reader_cfg,
39
+ infer_cfg=eprstmt_infer_cfg,
40
+ eval_cfg=eprstmt_eval_cfg)
41
+ ]
opencompass/configs/datasets/FewCLUE_eprstmt/FewCLUE_eprstmt_ppl_f1e631.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import PPLInferencer
4
+ from opencompass.openicl.icl_evaluator import AccEvaluator
5
+ from opencompass.datasets import HFDataset
6
+
7
+ eprstmt_reader_cfg = dict(
8
+ input_columns=['sentence'], output_column='label', test_split='train')
9
+
10
+ eprstmt_infer_cfg = dict(
11
+ prompt_template=dict(
12
+ type=PromptTemplate,
13
+ template={
14
+ 'Negative':
15
+ dict(round=[
16
+ dict(role='HUMAN', prompt='内容: "{sentence}"。情绪分类:'),
17
+ dict(role='BOT', prompt='消极。')
18
+ ]),
19
+ 'Positive':
20
+ dict(round=[
21
+ dict(role='HUMAN', prompt='内容: "{sentence}"。情绪分类:'),
22
+ dict(role='BOT', prompt='积极。')
23
+ ]),
24
+ }),
25
+ retriever=dict(type=ZeroRetriever),
26
+ inferencer=dict(type=PPLInferencer))
27
+
28
+ eprstmt_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
29
+
30
+ eprstmt_datasets = [
31
+ dict(
32
+ type=HFDataset,
33
+ abbr='eprstmt-dev',
34
+ path='json',
35
+ data_files='./data/FewCLUE/eprstmt/dev_few_all.json',
36
+ split='train',
37
+ reader_cfg=eprstmt_reader_cfg,
38
+ infer_cfg=eprstmt_infer_cfg,
39
+ eval_cfg=eprstmt_eval_cfg),
40
+ dict(
41
+ type=HFDataset,
42
+ abbr='eprstmt-test',
43
+ path='json',
44
+ data_files='./data/FewCLUE/eprstmt/test_public.json',
45
+ split='train',
46
+ reader_cfg=eprstmt_reader_cfg,
47
+ infer_cfg=eprstmt_infer_cfg,
48
+ eval_cfg=eprstmt_eval_cfg)
49
+ ]
opencompass/configs/datasets/PJExam/PJExam_gen.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from .PJExam_gen_8cd97c import PJExam_datasets # noqa: F401, F403
opencompass/configs/datasets/PJExam/PJExam_gen_8cd97c.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import PJExamDataset, PJExamEvaluator
5
+
6
+ PJExam_datasets = []
7
+ for _name in [
8
+ 'gk-2022-v1', 'gk-2022-v1-math', 'gk-2023-v1', 'gk-2023-v1-math',
9
+ 'gk-2023-v2', 'gk-2023-v2-math', 'zk-2022-v1'
10
+ ]:
11
+ _hint = '请你做一道</major>选择题\n请你一步一步思考并将思考过程写在【解析】和<eoe>之间。你将从A,B,C,D中选出正确的答案,并写在【答案】和<eoa>之间。\n例如:【答案】A<eoa>\n完整的题目回答的格式如下:\n【解析】...<eoe>\n【答案】...<eoa>\n请你严格按照上述格式作答。\n题目如下:\n'
12
+ _reader_cfg = {
13
+ 'input_columns': ['question'],
14
+ 'output_column': 'std_ans',
15
+ },
16
+ _infer_cfg = {
17
+ 'ice_template': {
18
+ 'type': PromptTemplate,
19
+ 'template': {
20
+ 'round': [{
21
+ 'role': 'HUMAN',
22
+ 'prompt': _hint + '{question}',
23
+ }]
24
+ },
25
+ 'ice_token': '</E>'
26
+ },
27
+ 'retriever': {
28
+ 'type': ZeroRetriever
29
+ },
30
+ 'inferencer': {
31
+ 'type': GenInferencer,
32
+ 'max_out_len': 1024,
33
+ }
34
+ }
35
+ _eval_cfg = {
36
+ 'evaluator': {
37
+ 'type': PJExamEvaluator
38
+ },
39
+ 'pred_role': 'BOT',
40
+ 'ds_column': 'eval_infos'
41
+ }
42
+ _dataset = {
43
+ 'type': PJExamDataset,
44
+ 'abbr': 'PJExamDataset-' + _name,
45
+ 'path': './data/PJExam',
46
+ 'name': _name,
47
+ 'reader_cfg': _reader_cfg,
48
+ 'infer_cfg': _infer_cfg,
49
+ 'eval_cfg': _eval_cfg,
50
+ }
51
+
52
+ PJExam_datasets.append(_dataset)
53
+
54
+ del _name, _hint, _reader_cfg, _infer_cfg, _eval_cfg, _dataset
opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from .SuperGLUE_COPA_gen_91ca53 import COPA_datasets # noqa: F401, F403
opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_gen_91ca53.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.openicl.icl_evaluator import AccEvaluator
5
+ from opencompass.datasets import COPADatasetV2
6
+ from opencompass.utils.text_postprocessors import first_option_postprocess
7
+
8
+ COPA_reader_cfg = dict(
9
+ input_columns=['question', 'premise', 'choice1', 'choice2'],
10
+ output_column='label',
11
+ )
12
+
13
+ COPA_infer_cfg = dict(
14
+ prompt_template=dict(
15
+ type=PromptTemplate,
16
+ template=dict(
17
+ round=[
18
+ dict(
19
+ role='HUMAN',
20
+ prompt=
21
+ '{premise}\nQuestion: Which may be the {question}?\nA. {choice1}\nB. {choice2}\nAnswer:'
22
+ ),
23
+ ], ),
24
+ ),
25
+ retriever=dict(type=ZeroRetriever),
26
+ inferencer=dict(type=GenInferencer),
27
+ )
28
+
29
+ COPA_eval_cfg = dict(
30
+ evaluator=dict(type=AccEvaluator),
31
+ pred_role='BOT',
32
+ pred_postprocessor=dict(type=first_option_postprocess, options='AB'),
33
+ )
34
+
35
+ COPA_datasets = [
36
+ dict(
37
+ abbr='COPA',
38
+ type=COPADatasetV2,
39
+ path='./data/SuperGLUE/COPA/val.jsonl',
40
+ reader_cfg=COPA_reader_cfg,
41
+ infer_cfg=COPA_infer_cfg,
42
+ eval_cfg=COPA_eval_cfg,
43
+ )
44
+ ]
opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from .SuperGLUE_COPA_ppl_9f3618 import COPA_datasets # noqa: F401, F403
opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_54058d.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import PPLInferencer
4
+ from opencompass.openicl.icl_evaluator import AccEvaluator
5
+ from opencompass.datasets import HFDataset
6
+
7
+ COPA_reader_cfg = dict(
8
+ input_columns=['question', 'premise', 'choice1', 'choice2'],
9
+ output_column='label',
10
+ test_split='train')
11
+
12
+ COPA_infer_cfg = dict(
13
+ prompt_template=dict(
14
+ type=PromptTemplate,
15
+ template={
16
+ 0: 'Premise:{premise}。\nQuestion:{question}。\nAnswer: {choice1}.',
17
+ 1: 'Passage:{premise}。\nQuestion:{question}。\nAnswer: {choice2}.',
18
+ }),
19
+ retriever=dict(type=ZeroRetriever),
20
+ inferencer=dict(type=PPLInferencer))
21
+
22
+ COPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
23
+
24
+ COPA_datasets = [
25
+ dict(
26
+ type=HFDataset,
27
+ abbr='COPA',
28
+ path='json',
29
+ data_files='./data/SuperGLUE/COPA/val.jsonl',
30
+ split='train',
31
+ reader_cfg=COPA_reader_cfg,
32
+ infer_cfg=COPA_infer_cfg,
33
+ eval_cfg=COPA_eval_cfg)
34
+ ]
opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_5c24f1.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import PPLInferencer
4
+ from opencompass.openicl.icl_evaluator import AccEvaluator
5
+ from opencompass.datasets import HFDataset
6
+
7
+ COPA_reader_cfg = dict(
8
+ input_columns=['question', 'premise', 'choice1', 'choice2'],
9
+ output_column='label',
10
+ test_split='train')
11
+
12
+ COPA_infer_cfg = dict(
13
+ prompt_template=dict(
14
+ type=PromptTemplate,
15
+ template={
16
+ 0:
17
+ dict(round=[
18
+ dict(role='HUMAN', prompt='{premise}\nQuestion: {question}\nAnswer:'),
19
+ dict(role='BOT', prompt='{choice1}'),
20
+ ]),
21
+ 1:
22
+ dict(round=[
23
+ dict(role='HUMAN', prompt='{premise}\nQuestion: {question}\nAnswer:'),
24
+ dict(role='BOT', prompt='{choice2}'),
25
+ ]),
26
+ },
27
+ ),
28
+ retriever=dict(type=ZeroRetriever),
29
+ inferencer=dict(type=PPLInferencer),
30
+ )
31
+
32
+ COPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
33
+
34
+ COPA_datasets = [
35
+ dict(
36
+ type=HFDataset,
37
+ abbr='COPA',
38
+ path='json',
39
+ data_files='./data/SuperGLUE/COPA/val.jsonl',
40
+ split='train',
41
+ reader_cfg=COPA_reader_cfg,
42
+ infer_cfg=COPA_infer_cfg,
43
+ eval_cfg=COPA_eval_cfg,
44
+ )
45
+ ]
opencompass/configs/datasets/SuperGLUE_COPA/SuperGLUE_COPA_ppl_9f3618.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import PPLInferencer
4
+ from opencompass.openicl.icl_evaluator import AccEvaluator
5
+ from opencompass.datasets import HFDataset
6
+
7
+ COPA_reader_cfg = dict(
8
+ input_columns=['question', 'premise', 'choice1', 'choice2'],
9
+ output_column='label',
10
+ test_split='train')
11
+
12
+ COPA_infer_cfg = dict(
13
+ prompt_template=dict(
14
+ type=PromptTemplate,
15
+ template={
16
+ 0:
17
+ dict(round=[
18
+ dict(
19
+ role='HUMAN',
20
+ prompt='{premise}\nQuestion: What may be the {question}?\nAnswer:'),
21
+ dict(role='BOT', prompt='{choice1}'),
22
+ ]),
23
+ 1:
24
+ dict(round=[
25
+ dict(
26
+ role='HUMAN',
27
+ prompt='{premise}\nQuestion: What may be the {question}?\nAnswer:'),
28
+ dict(role='BOT', prompt='{choice2}'),
29
+ ]),
30
+ },
31
+ ),
32
+ retriever=dict(type=ZeroRetriever),
33
+ inferencer=dict(type=PPLInferencer),
34
+ )
35
+
36
+ COPA_eval_cfg = dict(evaluator=dict(type=AccEvaluator))
37
+
38
+ COPA_datasets = [
39
+ dict(
40
+ type=HFDataset,
41
+ abbr='COPA',
42
+ path='json',
43
+ data_files='./data/SuperGLUE/COPA/val.jsonl',
44
+ split='train',
45
+ reader_cfg=COPA_reader_cfg,
46
+ infer_cfg=COPA_infer_cfg,
47
+ eval_cfg=COPA_eval_cfg,
48
+ )
49
+ ]
opencompass/configs/datasets/adv_glue/__init__.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from .adv_glue_sst2.adv_glue_sst2_gen import adv_sst2_datasets
5
+ from .adv_glue_qqp.adv_glue_qqp_gen import adv_qqp_datasets
6
+ from .adv_glue_rte.adv_glue_rte_gen import adv_rte_datasets
7
+ from .adv_glue_qnli.adv_glue_qnli_gen import adv_qnli_datasets
8
+ from .adv_glue_mnli.adv_glue_mnli_gen import adv_mnli_datasets
9
+ from .adv_glue_mnli_mm.adv_glue_mnli_mm_gen import adv_mnli_mm_datasets
10
+
11
+ datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
opencompass/configs/datasets/collections/base_core.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from ..mmlu.mmlu_ppl_ac766d import mmlu_datasets
5
+ from ..cmmlu.cmmlu_ppl_041cbf import cmmlu_datasets
6
+ from ..ceval.ceval_ppl_1cd8bf import ceval_datasets
7
+ from ..GaokaoBench.GaokaoBench_no_subjective_gen_d21e37 import GaokaoBench_datasets
8
+ from ..triviaqa.triviaqa_wiki_1shot_gen_20a989 import triviaqa_datasets
9
+ from ..nq.nq_open_1shot_gen_20a989 import nq_datasets
10
+ from ..race.race_ppl_abed12 import race_datasets
11
+ from ..winogrande.winogrande_5shot_ll_252f01 import winogrande_datasets
12
+ from ..hellaswag.hellaswag_10shot_ppl_59c85e import hellaswag_datasets
13
+ from ..bbh.bbh_gen_98fba6 import bbh_datasets
14
+ from ..gsm8k.gsm8k_gen_ee684f import gsm8k_datasets
15
+ from ..math.math_evaluatorv2_gen_2f4a71 import math_datasets
16
+ from ..TheoremQA.TheoremQA_post_v2_gen_2c2583 import TheoremQA_datasets
17
+ from ..humaneval.deprecated_humaneval_gen_d2537e import humaneval_datasets
18
+ from ..mbpp.deprecated_sanitized_mbpp_gen_cb43ef import sanitized_mbpp_datasets
19
+
20
+ datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
opencompass/configs/datasets/collections/base_medium.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from ..mmlu.mmlu_ppl_ac766d import mmlu_datasets
5
+ from ..ceval.ceval_ppl_578f8d import ceval_datasets
6
+ from ..agieval.agieval_mixed_713d14 import agieval_datasets
7
+ from ..GaokaoBench.GaokaoBench_mixed_9af5ee import GaokaoBench_datasets
8
+ from ..bbh.bbh_gen_5b92b0 import bbh_datasets
9
+ from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
10
+ from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
11
+ from ..CLUE_C3.CLUE_C3_ppl_e24a31 import C3_datasets
12
+ from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
13
+ from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
14
+ from ..CLUE_afqmc.CLUE_afqmc_ppl_6507d7 import afqmc_datasets
15
+ from ..CLUE_cmnli.CLUE_cmnli_ppl_fdc6de import cmnli_datasets
16
+ from ..CLUE_ocnli.CLUE_ocnli_ppl_fdc6de import ocnli_datasets
17
+ from ..FewCLUE_bustm.FewCLUE_bustm_ppl_e53034 import bustm_datasets
18
+ from ..FewCLUE_chid.FewCLUE_chid_ppl_8f2872 import chid_datasets
19
+ from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_4284a0 import cluewsc_datasets
20
+ from ..FewCLUE_csl.FewCLUE_csl_ppl_841b62 import csl_datasets
21
+ from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets
22
+ from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_ppl_c08300 import ocnli_fc_datasets
23
+ from ..FewCLUE_tnews.FewCLUE_tnews_ppl_d10e8a import tnews_datasets
24
+ from ..lcsts.lcsts_gen_8ee1fe import lcsts_datasets
25
+ from ..lambada.lambada_gen_217e11 import lambada_datasets
26
+ from ..storycloze.storycloze_ppl_496661 import storycloze_datasets
27
+ from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_6db806 import AX_b_datasets
28
+ from ..SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_66caf3 import AX_g_datasets
29
+ from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_314b96 import BoolQ_datasets
30
+ from ..SuperGLUE_CB.SuperGLUE_CB_ppl_0143fe import CB_datasets
31
+ from ..SuperGLUE_COPA.SuperGLUE_COPA_ppl_9f3618 import COPA_datasets
32
+ from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_ppl_ced824 import MultiRC_datasets
33
+ from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_66caf3 import RTE_datasets
34
+ from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
35
+ from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_312de9 import WiC_datasets
36
+ from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_003529 import WSC_datasets
37
+ from ..race.race_ppl_a138cd import race_datasets
38
+ from ..Xsum.Xsum_gen_31397e import Xsum_datasets
39
+ from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
40
+ from ..summedits.summedits_ppl_1fbeb6 import summedits_datasets
41
+ from ..math.math_gen_265cce import math_datasets
42
+ from ..TheoremQA.TheoremQA_gen_ef26ca import TheoremQA_datasets
43
+ from ..hellaswag.hellaswag_ppl_47bff9 import hellaswag_datasets
44
+ from ..ARC_e.ARC_e_ppl_a450bd import ARC_e_datasets
45
+ from ..ARC_c.ARC_c_ppl_a450bd import ARC_c_datasets
46
+ from ..commonsenseqa.commonsenseqa_ppl_5545e2 import commonsenseqa_datasets
47
+ from ..piqa.piqa_ppl_1cf9f0 import piqa_datasets
48
+ from ..siqa.siqa_ppl_ced5f6 import siqa_datasets
49
+ from ..strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
50
+ from ..winogrande.winogrande_ll_c5cf57 import winogrande_datasets
51
+ from ..obqa.obqa_ppl_c7c154 import obqa_datasets
52
+ from ..nq.nq_gen_c788f6 import nq_datasets
53
+ from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
54
+ from ..flores.flores_gen_806ede import flores_datasets
55
+
56
+ datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
opencompass/configs/datasets/collections/base_medium_llama.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from ..mmlu.mmlu_ppl_ac766d import mmlu_datasets
5
+ from ..ceval.ceval_ppl_578f8d import ceval_datasets
6
+ from ..agieval.agieval_mixed_713d14 import agieval_datasets
7
+ from ..GaokaoBench.GaokaoBench_mixed_9af5ee import GaokaoBench_datasets
8
+ from ..bbh.bbh_gen_5b92b0 import bbh_datasets
9
+ from ..humaneval.deprecated_humaneval_gen_a82cae import humaneval_datasets
10
+ from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
11
+ from ..CLUE_C3.CLUE_C3_ppl_e24a31 import C3_datasets
12
+ from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
13
+ from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
14
+ from ..CLUE_afqmc.CLUE_afqmc_ppl_6507d7 import afqmc_datasets
15
+ from ..CLUE_cmnli.CLUE_cmnli_ppl_fdc6de import cmnli_datasets
16
+ from ..CLUE_ocnli.CLUE_ocnli_ppl_fdc6de import ocnli_datasets
17
+ from ..FewCLUE_bustm.FewCLUE_bustm_ppl_e53034 import bustm_datasets
18
+ from ..FewCLUE_chid.FewCLUE_chid_ppl_8f2872 import chid_datasets
19
+ from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_4284a0 import cluewsc_datasets
20
+ from ..FewCLUE_csl.FewCLUE_csl_ppl_841b62 import csl_datasets
21
+ from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets
22
+ from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_ppl_c08300 import ocnli_fc_datasets
23
+ from ..FewCLUE_tnews.FewCLUE_tnews_ppl_d10e8a import tnews_datasets
24
+ from ..lcsts.lcsts_gen_8ee1fe import lcsts_datasets
25
+ from ..lambada.lambada_gen_217e11 import lambada_datasets
26
+ from ..storycloze.storycloze_ppl_496661 import storycloze_datasets
27
+ from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_6db806 import AX_b_datasets
28
+ from ..SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_66caf3 import AX_g_datasets
29
+ from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_314797 import BoolQ_datasets
30
+ from ..SuperGLUE_CB.SuperGLUE_CB_ppl_0143fe import CB_datasets
31
+ from ..SuperGLUE_COPA.SuperGLUE_COPA_ppl_9f3618 import COPA_datasets
32
+ from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_ppl_ced824 import MultiRC_datasets
33
+ from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_66caf3 import RTE_datasets
34
+ from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
35
+ from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_312de9 import WiC_datasets
36
+ from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_003529 import WSC_datasets
37
+ from ..race.race_ppl_5831a0 import race_datasets
38
+ from ..Xsum.Xsum_gen_31397e import Xsum_datasets
39
+ from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
40
+ from ..summedits.summedits_ppl_1fbeb6 import summedits_datasets
41
+ from ..math.math_gen_265cce import math_datasets
42
+ from ..TheoremQA.TheoremQA_gen_ef26ca import TheoremQA_datasets
43
+ from ..hellaswag.hellaswag_ppl_a6e128 import hellaswag_datasets
44
+ from ..ARC_e.ARC_e_ppl_2ef631 import ARC_e_datasets
45
+ from ..ARC_c.ARC_c_ppl_2ef631 import ARC_c_datasets
46
+ from ..commonsenseqa.commonsenseqa_ppl_5545e2 import commonsenseqa_datasets
47
+ from ..piqa.piqa_ppl_0cfff2 import piqa_datasets
48
+ from ..siqa.siqa_ppl_e8d8c5 import siqa_datasets
49
+ from ..strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
50
+ from ..winogrande.winogrande_ll_c5cf57 import winogrande_datasets
51
+ from ..obqa.obqa_ppl_6aac9e import obqa_datasets
52
+ from ..nq.nq_gen_0356ec import nq_datasets
53
+ from ..triviaqa.triviaqa_gen_0356ec import triviaqa_datasets
54
+ from ..flores.flores_gen_806ede import flores_datasets
55
+
56
+ datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
opencompass/configs/datasets/collections/base_small.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from ..ceval.ceval_ppl_578f8d import ceval_datasets
5
+ from ..bbh.bbh_gen_5b92b0 import bbh_datasets
6
+ from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
7
+ from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
8
+ from ..CLUE_afqmc.CLUE_afqmc_ppl_6507d7 import afqmc_datasets
9
+ from ..FewCLUE_bustm.FewCLUE_bustm_ppl_e53034 import bustm_datasets
10
+ from ..FewCLUE_chid.FewCLUE_chid_ppl_8f2872 import chid_datasets
11
+ from ..FewCLUE_cluewsc.FewCLUE_cluewsc_ppl_868415 import cluewsc_datasets
12
+ from ..FewCLUE_eprstmt.FewCLUE_eprstmt_ppl_f1e631 import eprstmt_datasets
13
+ from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
14
+ from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
15
+ from ..lambada.lambada_gen_217e11 import lambada_datasets
16
+ from ..storycloze.storycloze_ppl_496661 import storycloze_datasets
17
+ from ..SuperGLUE_AX_b.SuperGLUE_AX_b_ppl_6db806 import AX_b_datasets
18
+ from ..SuperGLUE_AX_g.SuperGLUE_AX_g_ppl_66caf3 import AX_g_datasets
19
+ from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_ppl_314b96 import BoolQ_datasets
20
+ from ..SuperGLUE_CB.SuperGLUE_CB_ppl_0143fe import CB_datasets
21
+ from ..SuperGLUE_COPA.SuperGLUE_COPA_ppl_9f3618 import COPA_datasets
22
+ from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_ppl_ced824 import MultiRC_datasets
23
+ from ..SuperGLUE_RTE.SuperGLUE_RTE_ppl_66caf3 import RTE_datasets
24
+ from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
25
+ from ..SuperGLUE_WiC.SuperGLUE_WiC_ppl_312de9 import WiC_datasets
26
+ from ..SuperGLUE_WSC.SuperGLUE_WSC_ppl_d0f531 import WSC_datasets
27
+ from ..race.race_ppl_a138cd import race_datasets
28
+ from ..math.math_gen_265cce import math_datasets
29
+ from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
30
+ from ..summedits.summedits_ppl_1fbeb6 import summedits_datasets
31
+ from ..hellaswag.hellaswag_ppl_47bff9 import hellaswag_datasets
32
+ from ..piqa.piqa_ppl_1cf9f0 import piqa_datasets
33
+ from ..winogrande.winogrande_ll_c5cf57 import winogrande_datasets
34
+ from ..obqa.obqa_ppl_c7c154 import obqa_datasets
35
+ from ..nq.nq_gen_c788f6 import nq_datasets
36
+ from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
37
+
38
+ datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
opencompass/configs/datasets/collections/chat_core.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from ..mmlu.mmlu_gen_4d595a import mmlu_datasets
5
+ from ..cmmlu.cmmlu_gen_c13365 import cmmlu_datasets
6
+ from ..ceval.ceval_internal_gen_2daf24 import ceval_datasets
7
+ from ..GaokaoBench.GaokaoBench_no_subjective_gen_4c31db import GaokaoBench_datasets
8
+ from ..triviaqa.triviaqa_wiki_1shot_gen_bc5f21 import triviaqa_datasets
9
+ from ..nq.nq_open_1shot_gen_2e45e5 import nq_datasets
10
+ from ..race.race_gen_69ee4f import race_datasets
11
+ from ..winogrande.winogrande_5shot_gen_6447e6 import winogrande_datasets
12
+ from ..hellaswag.hellaswag_10shot_gen_e42710 import hellaswag_datasets
13
+ from ..bbh.bbh_gen_5b92b0 import bbh_datasets
14
+ from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
15
+ from ..math.math_evaluatorv2_gen_cecb31 import math_datasets
16
+ from ..TheoremQA.TheoremQA_post_v2_gen_ef26ca import TheoremQA_datasets
17
+ from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
18
+ from ..mbpp.deprecated_sanitized_mbpp_gen_1e1056 import sanitized_mbpp_datasets
19
+
20
+ datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
opencompass/configs/datasets/collections/chat_medium.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from ..mmlu.mmlu_gen_4d595a import mmlu_datasets
5
+ from ..ceval.ceval_gen_5f30c7 import ceval_datasets
6
+ from ..agieval.agieval_gen_64afd3 import agieval_datasets
7
+ from ..GaokaoBench.GaokaoBench_gen_5cfe9e import GaokaoBench_datasets
8
+ from ..bbh.bbh_gen_5b92b0 import bbh_datasets
9
+ from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
10
+ from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
11
+ from ..CLUE_C3.CLUE_C3_gen_8c358f import C3_datasets
12
+ from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
13
+ from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
14
+ from ..CLUE_afqmc.CLUE_afqmc_gen_901306 import afqmc_datasets
15
+ from ..CLUE_cmnli.CLUE_cmnli_gen_1abf97 import cmnli_datasets
16
+ from ..CLUE_ocnli.CLUE_ocnli_gen_c4cb6c import ocnli_datasets
17
+ from ..FewCLUE_bustm.FewCLUE_bustm_gen_634f41 import bustm_datasets
18
+ from ..FewCLUE_chid.FewCLUE_chid_gen_0a29a2 import chid_datasets
19
+ from ..FewCLUE_cluewsc.FewCLUE_cluewsc_gen_c68933 import cluewsc_datasets
20
+ from ..FewCLUE_csl.FewCLUE_csl_gen_28b223 import csl_datasets
21
+ from ..FewCLUE_eprstmt.FewCLUE_eprstmt_gen_740ea0 import eprstmt_datasets
22
+ from ..FewCLUE_ocnli_fc.FewCLUE_ocnli_fc_gen_f97a97 import ocnli_fc_datasets
23
+ from ..FewCLUE_tnews.FewCLUE_tnews_gen_b90e4a import tnews_datasets
24
+ from ..lcsts.lcsts_gen_8ee1fe import lcsts_datasets
25
+ from ..lambada.lambada_gen_217e11 import lambada_datasets
26
+ from ..storycloze.storycloze_gen_7f656a import storycloze_datasets
27
+ from ..SuperGLUE_AX_b.SuperGLUE_AX_b_gen_4dfefa import AX_b_datasets
28
+ from ..SuperGLUE_AX_g.SuperGLUE_AX_g_gen_68aac7 import AX_g_datasets
29
+ from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
30
+ from ..SuperGLUE_CB.SuperGLUE_CB_gen_854c6c import CB_datasets
31
+ from ..SuperGLUE_COPA.SuperGLUE_COPA_gen_91ca53 import COPA_datasets
32
+ from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_gen_27071f import MultiRC_datasets
33
+ from ..SuperGLUE_RTE.SuperGLUE_RTE_gen_68aac7 import RTE_datasets
34
+ from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
35
+ from ..SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import WiC_datasets
36
+ from ..SuperGLUE_WSC.SuperGLUE_WSC_gen_7902a7 import WSC_datasets
37
+ from ..race.race_gen_69ee4f import race_datasets
38
+ from ..Xsum.Xsum_gen_31397e import Xsum_datasets
39
+ from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
40
+ from ..summedits.summedits_gen_315438 import summedits_datasets
41
+ from ..math.math_gen_265cce import math_datasets
42
+ from ..TheoremQA.TheoremQA_gen_7009de import TheoremQA_datasets
43
+ from ..hellaswag.hellaswag_gen_6faab5 import hellaswag_datasets
44
+ from ..ARC_e.ARC_e_gen_1e0de5 import ARC_e_datasets
45
+ from ..ARC_c.ARC_c_gen_1e0de5 import ARC_c_datasets
46
+ from ..commonsenseqa.commonsenseqa_gen_c946f2 import commonsenseqa_datasets
47
+ from ..piqa.piqa_gen_1194eb import piqa_datasets
48
+ from ..siqa.siqa_gen_e78df3 import siqa_datasets
49
+ from ..strategyqa.strategyqa_gen_1180a7 import strategyqa_datasets
50
+ from ..winogrande.deprecated_winogrande_gen_a9ede5 import winogrande_datasets
51
+ from ..obqa.obqa_gen_9069e4 import obqa_datasets
52
+ from ..nq.nq_gen_c788f6 import nq_datasets
53
+ from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
54
+ from ..flores.flores_gen_806ede import flores_datasets
55
+
56
+ datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
opencompass/configs/datasets/collections/chat_small.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from ..mmlu.mmlu_gen_4d595a import mmlu_datasets
5
+ from ..ceval.ceval_gen_5f30c7 import ceval_datasets
6
+ from ..bbh.bbh_gen_5b92b0 import bbh_datasets
7
+ from ..CLUE_CMRC.CLUE_CMRC_gen_1bd3c8 import CMRC_datasets
8
+ from ..CLUE_DRCD.CLUE_DRCD_gen_1bd3c8 import DRCD_datasets
9
+ from ..CLUE_afqmc.CLUE_afqmc_gen_901306 import afqmc_datasets
10
+ from ..FewCLUE_bustm.FewCLUE_bustm_gen_634f41 import bustm_datasets
11
+ from ..FewCLUE_chid.FewCLUE_chid_gen_0a29a2 import chid_datasets
12
+ from ..FewCLUE_cluewsc.FewCLUE_cluewsc_gen_c68933 import cluewsc_datasets
13
+ from ..FewCLUE_eprstmt.FewCLUE_eprstmt_gen_740ea0 import eprstmt_datasets
14
+ from ..humaneval.humaneval_gen_8e312c import humaneval_datasets
15
+ from ..mbpp.deprecated_mbpp_gen_1e1056 import mbpp_datasets
16
+ from ..lambada.lambada_gen_217e11 import lambada_datasets
17
+ from ..storycloze.storycloze_gen_7f656a import storycloze_datasets
18
+ from ..SuperGLUE_AX_b.SuperGLUE_AX_b_gen_4dfefa import AX_b_datasets
19
+ from ..SuperGLUE_AX_g.SuperGLUE_AX_g_gen_68aac7 import AX_g_datasets
20
+ from ..SuperGLUE_BoolQ.SuperGLUE_BoolQ_gen_883d50 import BoolQ_datasets
21
+ from ..SuperGLUE_CB.SuperGLUE_CB_gen_854c6c import CB_datasets
22
+ from ..SuperGLUE_COPA.SuperGLUE_COPA_gen_91ca53 import COPA_datasets
23
+ from ..SuperGLUE_MultiRC.SuperGLUE_MultiRC_gen_27071f import MultiRC_datasets
24
+ from ..SuperGLUE_RTE.SuperGLUE_RTE_gen_68aac7 import RTE_datasets
25
+ from ..SuperGLUE_ReCoRD.SuperGLUE_ReCoRD_gen_30dea0 import ReCoRD_datasets
26
+ from ..SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import WiC_datasets
27
+ from ..SuperGLUE_WSC.SuperGLUE_WSC_gen_fe4bf3 import WSC_datasets
28
+ from ..race.race_gen_69ee4f import race_datasets
29
+ from ..math.math_gen_265cce import math_datasets
30
+ from ..gsm8k.gsm8k_gen_1d7fe4 import gsm8k_datasets
31
+ from ..summedits.summedits_gen_315438 import summedits_datasets
32
+ from ..hellaswag.hellaswag_gen_6faab5 import hellaswag_datasets
33
+ from ..piqa.piqa_gen_1194eb import piqa_datasets
34
+ from ..winogrande.deprecated_winogrande_gen_a9ede5 import winogrande_datasets
35
+ from ..obqa.obqa_gen_9069e4 import obqa_datasets
36
+ from ..nq.nq_gen_c788f6 import nq_datasets
37
+ from ..triviaqa.triviaqa_gen_2121ce import triviaqa_datasets
38
+
39
+ datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
opencompass/configs/datasets/collections/example.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from ..piqa.piqa_gen_1194eb import piqa_datasets
5
+ from ..nq.nq_gen_c788f6 import nq_datasets
6
+
7
+ datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
opencompass/configs/datasets/ds1000/ds1000_compl_gen_cbc84f.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import (DS1000Dataset, ds1000_completion_postprocess,
5
+ ds1000_matplotlib_postprocess,
6
+ DS1000Evaluator)
7
+
8
+ ds1000_reader_cfg = dict(
9
+ input_columns=['prompt'],
10
+ output_column='test_column',
11
+ train_split='test',
12
+ test_split='test')
13
+
14
+ ds1000_infer_cfg = dict(
15
+ prompt_template=dict(
16
+ type=PromptTemplate,
17
+ template=dict(round=[
18
+ dict(
19
+ role='HUMAN',
20
+ prompt='{prompt}',
21
+ ),
22
+ ]),
23
+ ),
24
+ retriever=dict(type=ZeroRetriever),
25
+ inferencer=dict(type=GenInferencer),
26
+ )
27
+
28
+ ds1000_eval_cfg = dict(
29
+ evaluator=dict(type=DS1000Evaluator),
30
+ pred_role='BOT',
31
+ pred_postprocessor=dict(type=ds1000_completion_postprocess),
32
+ )
33
+
34
+ # The DS-1000 dataset can be downloaded from
35
+ # https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
36
+ ds1000_datasets = [
37
+ dict(
38
+ abbr=f'ds1000_{lib}',
39
+ type=DS1000Dataset,
40
+ path='./data/ds1000_data/',
41
+ libs=f'{lib}',
42
+ mode='Completion',
43
+ reader_cfg=ds1000_reader_cfg,
44
+ infer_cfg=ds1000_infer_cfg,
45
+ eval_cfg=ds1000_eval_cfg,
46
+ ) for lib in [
47
+ 'Pandas',
48
+ 'Numpy',
49
+ 'Tensorflow',
50
+ 'Scipy',
51
+ 'Sklearn',
52
+ 'Pytorch',
53
+ ]
54
+ ]
55
+ ds1000_datasets.append(
56
+ dict(
57
+ abbr='ds1000_Matplotlib',
58
+ type=DS1000Dataset,
59
+ path='./data/ds1000_data/',
60
+ libs='Matplotlib',
61
+ mode='Completion',
62
+ reader_cfg=ds1000_reader_cfg,
63
+ infer_cfg=ds1000_infer_cfg,
64
+ eval_cfg=dict(
65
+ evaluator=dict(type=DS1000Evaluator),
66
+ pred_role='BOT',
67
+ pred_postprocessor=dict(type=ds1000_matplotlib_postprocess),
68
+ ),
69
+ ))
opencompass/configs/datasets/ds1000/ds1000_compl_service_eval_gen_cbc84f.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import DS1000Dataset, DS1000ServiceEvaluator
5
+
6
+ ds1000_reader_cfg = dict(
7
+ input_columns=['prompt'],
8
+ output_column='test_column',
9
+ train_split='test',
10
+ test_split='test')
11
+
12
+ ds1000_infer_cfg = dict(
13
+ prompt_template=dict(
14
+ type=PromptTemplate,
15
+ template=dict(round=[
16
+ dict(
17
+ role='HUMAN',
18
+ prompt='{prompt}',
19
+ ),
20
+ ]),
21
+ ),
22
+ retriever=dict(type=ZeroRetriever),
23
+ inferencer=dict(type=GenInferencer),
24
+ )
25
+
26
+ ds1000_eval_cfg_dict = {
27
+ lib: dict(
28
+ evaluator=dict(
29
+ type=DS1000ServiceEvaluator,
30
+ lib=lib,
31
+ ip_address=
32
+ 'localhost', # replace to your code_eval_server ip_address, port
33
+ port=5000
34
+ ),
35
+ pred_role='BOT')
36
+ for lib in [
37
+ 'Pandas',
38
+ 'Numpy',
39
+ 'Tensorflow',
40
+ 'Scipy',
41
+ 'Sklearn',
42
+ 'Pytorch',
43
+ 'Matplotlib',
44
+ ]
45
+ }
46
+
47
+ # The DS-1000 dataset can be downloaded from
48
+ # https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
49
+ ds1000_datasets = [
50
+ dict(
51
+ abbr=f'ds1000_{lib}',
52
+ type=DS1000Dataset,
53
+ path='./data/ds1000_data/',
54
+ libs=f'{lib}',
55
+ mode='Completion',
56
+ reader_cfg=ds1000_reader_cfg,
57
+ infer_cfg=ds1000_infer_cfg,
58
+ eval_cfg=ds1000_eval_cfg_dict[lib],
59
+ ) for lib in [
60
+ 'Pandas',
61
+ 'Numpy',
62
+ 'Tensorflow',
63
+ 'Scipy',
64
+ 'Sklearn',
65
+ 'Pytorch',
66
+ 'Matplotlib',
67
+ ]
68
+ ]
opencompass/configs/datasets/ds1000/ds1000_gen_5c4bec.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import AgentInferencer
4
+ from opencompass.datasets import DS1000Dataset_Interperter, DS1000InterpreterEvaluator
5
+
6
+ ds1000_example = """
7
+ In the following task, you should generate code with one assertion to testify the correctness of your code.
8
+
9
+ Example:
10
+
11
+ <HUMAN>Problem:
12
+ How do I get the dimensions of an array? For instance, this is (2, 2):
13
+ a = np.array([[1,2],[3,4]])
14
+ <ASSISTANT>{thought} In Python, Numpy provides a method called `shape` which helps to get the dimensions of an array.
15
+ {action} PythonInterpreter
16
+ {action_input}
17
+ ```python
18
+ import numpy as np
19
+ def solution(x):
20
+ # Convert to np.ndarray
21
+ x = np.array(x)
22
+ # Getting the dimensions of the array
23
+ dimensions = x.shape
24
+ return dimensions
25
+ assert solution([[1,2],[3,4]]) == (2, 2)
26
+ ```
27
+ <SYSTEM>{response}True
28
+ <ASSISTANT> {thought} By running this code, you can get the dimensions of an array.
29
+ {finish}
30
+ ```python
31
+ import numpy as np
32
+ def solution(x):
33
+ # Convert to np.ndarray
34
+ x = np.array(x)
35
+ # Getting the dimensions of the array
36
+ dimensions = x.shape
37
+ return dimensions
38
+ ```
39
+ """
40
+
41
+ ds1000_reader_cfg = dict(
42
+ input_columns=['prompt'],
43
+ output_column='test_column',
44
+ train_split='test',
45
+ test_split='test',
46
+ )
47
+
48
+ ds1000_infer_cfg = dict(
49
+ prompt_template=dict(
50
+ type=PromptTemplate,
51
+ template="""{prompt}""",
52
+ ),
53
+ retriever=dict(type=ZeroRetriever),
54
+ inferencer=dict(type=AgentInferencer, example=ds1000_example),
55
+ )
56
+
57
+ ds1000_eval_cfg = dict(
58
+ evaluator=dict(type=DS1000InterpreterEvaluator),
59
+ pred_role='BOT',
60
+ )
61
+
62
+ # The DS-1000 dataset can be downloaded from
63
+ # https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
64
+
65
+ # Matplotlib cannot fit this setting
66
+ ds1000_datasets = [
67
+ dict(
68
+ abbr=f'ds1000_{lib}',
69
+ type=DS1000Dataset_Interperter, # bustm share the same format with AFQMC
70
+ path='./data/ds1000_data/',
71
+ libs=f'{lib}',
72
+ reader_cfg=ds1000_reader_cfg,
73
+ infer_cfg=ds1000_infer_cfg,
74
+ eval_cfg=ds1000_eval_cfg,
75
+ )
76
+ for lib in [
77
+ 'Pandas',
78
+ 'Numpy',
79
+ # 'Tensorflow', # error using tensorflow, skipped temporarily
80
+ 'Scipy',
81
+ 'Sklearn',
82
+ 'Pytorch',
83
+ ]
84
+ ]
opencompass/configs/datasets/ds1000/ds1000_gen_cbc84f.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import (DS1000Dataset, ds1000_postprocess,
5
+ ds1000_matplotlib_postprocess,
6
+ DS1000Evaluator)
7
+
8
+ ds1000_reader_cfg = dict(
9
+ input_columns=['prompt'],
10
+ output_column='test_column',
11
+ train_split='test',
12
+ test_split='test')
13
+
14
+ ds1000_infer_cfg = dict(
15
+ prompt_template=dict(
16
+ type=PromptTemplate,
17
+ template=dict(round=[
18
+ dict(
19
+ role='HUMAN',
20
+ prompt='{prompt}',
21
+ ),
22
+ ]),
23
+ ),
24
+ retriever=dict(type=ZeroRetriever),
25
+ inferencer=dict(type=GenInferencer),
26
+ )
27
+
28
+ ds1000_eval_cfg = dict(
29
+ evaluator=dict(type=DS1000Evaluator),
30
+ pred_role='BOT',
31
+ pred_postprocessor=dict(type=ds1000_postprocess),
32
+ )
33
+
34
+ # The DS-1000 dataset can be downloaded from
35
+ # https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
36
+ ds1000_datasets = [
37
+ dict(
38
+ abbr=f'ds1000_{lib}',
39
+ type=DS1000Dataset,
40
+ path='./data/ds1000_data/',
41
+ libs=f'{lib}',
42
+ reader_cfg=ds1000_reader_cfg,
43
+ infer_cfg=ds1000_infer_cfg,
44
+ eval_cfg=ds1000_eval_cfg,
45
+ ) for lib in [
46
+ 'Pandas',
47
+ 'Numpy',
48
+ 'Tensorflow',
49
+ 'Scipy',
50
+ 'Sklearn',
51
+ 'Pytorch',
52
+ ]
53
+ ]
54
+ ds1000_datasets.append(
55
+ dict(
56
+ abbr='ds1000_Matplotlib',
57
+ type=DS1000Dataset,
58
+ path='./data/ds1000_data/',
59
+ libs='Matplotlib',
60
+ reader_cfg=ds1000_reader_cfg,
61
+ infer_cfg=ds1000_infer_cfg,
62
+ eval_cfg=dict(
63
+ evaluator=dict(type=DS1000Evaluator),
64
+ pred_role='BOT',
65
+ pred_postprocessor=dict(type=ds1000_matplotlib_postprocess),
66
+ ),
67
+ ))
opencompass/configs/datasets/ds1000/ds1000_service_eval_gen_cbc84f.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import DS1000Dataset, DS1000ServiceEvaluator
5
+
6
+ ds1000_reader_cfg = dict(
7
+ input_columns=['prompt'],
8
+ output_column='test_column',
9
+ train_split='test',
10
+ test_split='test')
11
+
12
+ ds1000_infer_cfg = dict(
13
+ prompt_template=dict(
14
+ type=PromptTemplate,
15
+ template=dict(round=[
16
+ dict(
17
+ role='HUMAN',
18
+ prompt='{prompt}',
19
+ ),
20
+ ]),
21
+ ),
22
+ retriever=dict(type=ZeroRetriever),
23
+ inferencer=dict(type=GenInferencer),
24
+ )
25
+
26
+ ds1000_eval_cfg_dict = {
27
+ lib: dict(
28
+ evaluator=dict(
29
+ type=DS1000ServiceEvaluator,
30
+ lib=lib,
31
+ ip_address=
32
+ 'localhost', # replace to your code_eval_server ip_address, port
33
+ port=5000
34
+ ),
35
+ pred_role='BOT')
36
+ for lib in [
37
+ 'Pandas',
38
+ 'Numpy',
39
+ 'Tensorflow',
40
+ 'Scipy',
41
+ 'Sklearn',
42
+ 'Pytorch',
43
+ 'Matplotlib',
44
+ ]
45
+ }
46
+
47
+ # The DS-1000 dataset can be downloaded from
48
+ # https://github.com/HKUNLP/DS-1000/blob/main/ds1000_data.zip
49
+ ds1000_datasets = [
50
+ dict(
51
+ abbr=f'ds1000_{lib}',
52
+ type=DS1000Dataset,
53
+ path='./data/ds1000_data/',
54
+ libs=f'{lib}',
55
+ reader_cfg=ds1000_reader_cfg,
56
+ infer_cfg=ds1000_infer_cfg,
57
+ eval_cfg=ds1000_eval_cfg_dict[lib],
58
+ ) for lib in [
59
+ 'Pandas',
60
+ 'Numpy',
61
+ 'Tensorflow',
62
+ 'Scipy',
63
+ 'Sklearn',
64
+ 'Pytorch',
65
+ 'Matplotlib',
66
+ ]
67
+ ]
opencompass/configs/datasets/gaokao_math/README.md ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # GaoKao MATH Answer Evaluation Dataset
2
+ A dataset for testing the performance of the model in the GaoKao MATH Answer Extraction task.
3
+ Now support the following format of GAOKAO math questions:
4
+ 1. '单选题':Single choice question
5
+ 2. '多选题':Multiple choice question
6
+ 3. '填空题':Fill in the blank question, can be multiple blanks
7
+ 4. '解答题':Answer question, can be multiple answers
8
+
9
+ Sample data:
10
+ ```json
11
+ [
12
+ {
13
+ "id": "3b270bc4-570a-4d77-b122-a2fc372f7d6a",
14
+ "question": "过椭圆${x^2\\over {16}} +{ y^2 \\over {4}}=1$ %内一点$M(2,1)$ %引一条弦,使该弦被点$M$ %平分,则这条弦所在直线的方程为( ).\nA. $x+2y-4=0$ %\nB. $x-2y-4=0$ %\nC. $x+2y+4=0$ %\nD. $x-2y+4=0$ %\n\n",
15
+ "response": "本题主要考查直线与圆锥曲线.设所求直线与椭圆的一个交点为$A(x,y)$ %,由于中点$M(2,1)$ %,所以另一个交点$B$ %为$(4-x,2-y)$ %.因为$A$ %,$B$ %两点都在椭圆上,所以$x^2+4y^2=16$ %,$(4-x)^2+4(2-y)^2=16$ %,两式相减,整理可得$x+2y-4=0$ %.由于过$A$ %,$B$ %两点的直线只有一条,所以这条弦所在直线的方程为$x+2y-4=0$ %.故本题正确答案为A.\n答案是:A",
16
+ "extract_answer": "A",
17
+ "question_type": "单选题"
18
+ },
19
+ {
20
+ "id": "d60e42d7-30ee-44f9-a94d-aff6a8127750",
21
+ "question": "若函数$f(x)$ 具有下列性质:1.定义域为$(-1,1)$ ;2.对于任意的$x,y\\in(-1,1)$ ,都有$f(x)+f(y)=f\\left({\\dfrac{x+y}{1+xy}}\\right)$ ;3.当$-1< x< 0$ 时,$f(x)>0$ ,则称函数$f(x)$ 为$δ$ 的函数$.$ 若函数$f(x)$ 为$δ$ 的函数,则以下结论正确的是$(\\quad)$\nA. $\nB. x)$ 为奇函数\nC. $\nD. x)$ 为偶函数\nE. $\nF. x)$ 为单调递减函数\nG. $\nH. x)$ 为单调递增函数\n\n",
22
+ "response": "函数$f(x)$ 为$δ$ 的函数,令$x=y=0$ ,则$f(0)+f(0)=f(0)$ ,即$f(0)=0$ ,令$y=-x$ ,则$f(x)+f(-x)=f\\left(\\dfrac{x-x}{1-{x}^{2}}\\right)=f(0)=0$ ,则$f(-x)=-f(x)$ ,即函数$f(x)$ 是奇函数,设$-1< x< y< 1$ ,则$f(x)-f(y)=f(x)+f(-y)=f\\left(\\dfrac{x-y}{1-xy}\\right)$ ,$∵-1< x< y< 1$ ,$∴-1< \\dfrac{x-y}{1-xy}< 0$ ,则$f\\left(\\dfrac{x-y}{1-xy}\\right)>0$ ,即$f(x)-f(y)>0$ ,则$f(x)>f(y)$ ,即$f(x)$ 在$(-1,1)$ 上是减函数.故选$AC.$ 本题考查函数的奇偶性和单调性的判断,注意运用定义法,考查运算能力和推理能力,属于中档题.可令$x=y=0$ ,求得$f(0)=0$ ,再令$y=-x$ 可得$f(-x)=-f(x)$ ,可得$f(x)$ 的奇偶性;再令$-1< x< y< 1$ ,运用单调性的定义,结合其偶性的定义可得其单调性.\n答案是:A; C",
23
+ "extract_answer": "A, C",
24
+ "question_type": "多选题"
25
+ },
26
+ {
27
+ "id": "31b3f702-e60c-4a20-9a40-73bd72b92d1e",
28
+ "question": "请完成以下题目(1)曲线$$y=-5\\text{e}^{x}+3$$在点$$(0,-2)$$处的切线方程为___.(2)若曲线$$f(x)=x \\sin x+1$$在$$x=\\dfrac{ \\pi }{2}$$处的切线与直线$$ax+2y+1=0$$相互垂直,则实数$$a=$$___.\n\n",
29
+ "response": "(1)由$$y=-5\\text{e}^{x}+3$$,得$$y'=-5\\text{e}^{x}$$,所以切线的斜率$$k=y'|_{x=0}=-5$$,所以切线方程为$$y+2=-5(x-0)$$,即$$5x+y+2=0$$.(2)因为$$f'(x)= \\sin x+x \\cos x$$,所以$$f'\\left(\\dfrac{ \\pi }{2}\\right)= \\sin \\dfrac{ \\pi }{2}+\\dfrac{ \\pi }{2}\\cdot \\cos \\dfrac{ \\pi }{2}=1$$.又直线$$ax+2y+1=0$$的斜率为$$-\\dfrac{a}{2}$$,所以根据题意得$$1\\times \\left(-\\dfrac{a}{2}\\right)=-1$$,解得$$a=2$$.\n答案是:(1)$$5x+y+2=0$$ (2)$$2$$",
30
+ "extract_answer": "['(1)$$5x+y+2=0$$ (2)$$2$$']",
31
+ "question_type": "填空题"
32
+ },
33
+ {
34
+ "id": "16878941-1772-4290-bc61-00b193d5cf70",
35
+ "question": "已知函数$f\\left( x \\right)=\\left| 2x-1 \\right|$.(1)若不等式$f\\left( x+\\frac{1}{2} \\right)\\ge 2m+1\\left( m > 0 \\right)$的解集为$\\left( -\\infty ,-2 \\right]\\bigcup \\left[ 2,+\\infty \\right)$,求实数$m$的值;(2)若不等式$f\\left( x \\right)\\le {{2}^{y}}+\\frac{a}{{{2}^{y}}}+\\left| 2x+3 \\right|$对任意的实数$x,y\\in R$恒成立,求实数$a$的最小值.\n\n",
36
+ "response": "(1)直接写出不等式,解含有绝对值的函数不等式即可;(2)这是恒成立求参的问题,根据绝对值三角不等式得到左侧函数的最值,再结合均值不等式得最值.(1)由条件得$\\left| 2x \\right|\\le 2m+1$得$-m-\\frac{1}{2}\\le x\\le m+\\frac{1}{2}$,所以$m=\\frac{3}{2}$.(2)原不等式等价于$\\left| 2x-1 \\right|-\\left| 2x+3 \\right|\\le {{2}^{y}}+\\frac{a}{{{2}^{y}}}$,而$\\left| 2x-1 \\right|-\\left| 2x+3 \\right|\\le \\left| \\left( 2x-1 \\right)-\\left( 2x+3 \\right) \\right|=4$,所以${{2}^{y}}+\\frac{a}{{{2}^{y}}}\\ge 4$,则$a\\ge {{\\left[ {{2}^{y}}\\left( 4-{{2}^{y}} \\right) \\right]}_{\\text{max}}}=4$,当且仅当$y=1$时取得.\n答案是:(1) $m=\\frac{3}{2}$;(2) 最小值为$a=4$.",
37
+ "extract_answer": [
38
+ "(1) $m=\\frac{3}{2}$;(2) 最小值为$a=4$."
39
+ ],
40
+ "question_type": "解答题"
41
+ }
42
+ ]
43
+ ```
44
+ ## How to use
45
+
46
+ ### 1. Prepare the dataset
47
+ ```bash
48
+ cd opencompass
49
+ cp -rf /cpfs01/shared/public/liuhongwei/data/gaokao_math_dataset/gaokao_math ./data
50
+ ```
51
+ 📢:If you want to evaluate your own gaokao math data, replace the `test_v2.jsonl` with your own data, but follow the format above.
52
+
53
+ ### 2. Set the evaluation model
54
+
55
+ open `opencompass.datasets.gaokao_math.gaokao_math_gen_9b076f` and set the model name and api url for evaluation, multiple urls are supported for acceleration.
56
+
57
+ ```python
58
+ ...
59
+
60
+ gaokao_math_eval_cfg = dict(
61
+ evaluator=dict(type=GaoKaoMATHEvaluator, model_name='EVALUATE_MODEL_NAME', url=['http://0.0.0.0:23333/v1', 'http://...']))
62
+
63
+ ...
64
+
65
+ ```
66
+ We recommand `Qwen2.5-72B-Instruct` model for evaluation.
67
+
68
+
69
+ ### 3. Set Extractor model and run the evaluation
70
+
71
+ ```python
72
+ from mmengine.config import read_base
73
+ from opencompass.models import HuggingFacewithChatTemplate
74
+
75
+
76
+ with read_base():
77
+ from opencompass.datasets.gaokao_math.gaokao_math_gen_9b076f import gaokao_math_datasets
78
+
79
+
80
+ trained_qwen2_1_5b_model = [ # trained extractor model
81
+ dict(
82
+ type=HuggingFacewithChatTemplate,
83
+ abbr='gaokao_math_extractor_1_5b_v02',
84
+ path='/cpfs01/shared/public/liuhongwei/models/gaokao_math_trained/gaokao_math_extractor_1_5b_v02',
85
+ max_out_len=1024,
86
+ batch_size=8,
87
+ run_cfg=dict(num_gpus=1),
88
+ )
89
+ ]
90
+
91
+ datasets = sum([v for k, v in locals().items() if k.endswith("_datasets")], [])
92
+ models = sum([v for k, v in locals().items() if k.endswith("_model")], [])
93
+
94
+ ...
95
+ ```
96
+
97
+ ### 4. Run the evaluation
98
+
99
+ ```bash
100
+ python run.py eval.py --dump-eval-details # eval and dump the evaluation details to `results` folder
101
+ ```
102
+
103
+
104
+ ### 5. Evaluation results
105
+
106
+ | Evaluator / Extractor | Qwen2.5-72B-Instruct | gaokao_math_extractor_1.5b_v0.2 |
107
+ |-----------------------|-----------------------|----------------------------------|
108
+ | Qwen2.5-72B-Instruct (ACC) | 95.85 | 95.2 |
opencompass/configs/datasets/gaokao_math/gaokao_math_gen_f5fd28.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import GaoKaoMATHDataset, GaoKaoMATHEvaluator
5
+
6
+
7
+ MATH_CN_PROMPT="""
8
+ 你是一个数学阅卷专家,任务是从给定的回答句子中提取精确的关键答案。你必须只提供提取的关键答案,不包括任何额外的文字。
9
+
10
+ 我将为你提供一个问题、回答句子和问题类型。回答句子是对所提供问题的回应。利用提供的信息,你必须准确而精确地确定并从回答句子中提取预期的关键答案。请不要对问题发表主观看法。
11
+
12
+ 对于单选题,答案应该是选项字母,例如 "A";
13
+ 对于多选题,答案应该是一个选项字母的列表,例如 ["A"] 或 ["A", "B", "C"];
14
+ 对于填空题,答案应该是一个填入空白处的答案列表,列表的数量应该与问题中的空白数量相同,例如 ["$$\\frac{{1}}{{2}}$$"] 或 ["$$\\frac{{1}}{{2}}$$", "2"]。
15
+ 对于问答题,类似填空题,为每个小问抽出相应答案,例如 ["$$\\frac{{1}}{{2}}$$"] 或 ["$$\\frac{{1}}{{2}}$$", "2"]。
16
+
17
+ 如果回答句子提供了多个不同的答案,请仔细判断后面提供的答案是否是对前面答案的修正或修改。如果是这样,提取这个修正或修改后的答案作为最终答案。相反,如果回答句子在多个答案之间波动而没有明确的最终答案,你应该输出 [No valid answer]。
18
+
19
+ 问题类型: {question_type}
20
+ 原始问题: {question}
21
+ 回答: {response}
22
+ 提取的关键答案:
23
+ """
24
+
25
+ gaokao_math_reader_cfg = dict(input_columns=['question', 'response', 'question_type'], output_column='extract_answer')
26
+
27
+
28
+ gaokao_math_infer_cfg = dict(
29
+ prompt_template=dict(
30
+ type=PromptTemplate,
31
+ template=dict(round=[
32
+ dict(role='HUMAN', prompt=MATH_CN_PROMPT),
33
+ ])),
34
+ retriever=dict(type=ZeroRetriever),
35
+ inferencer=dict(type=GenInferencer, max_out_len=512))
36
+
37
+ gaokao_math_eval_cfg = dict(
38
+ evaluator=dict(type=GaoKaoMATHEvaluator, model_name='Qwen/Qwen2.5-72B-Instruct', url=['http://22.8.73.119:23333/v1', 'http://22.8.4.97:23333/v1', 'http://22.8.22.254:23333/v1', 'http://22.8.17.14:23333/v1']))
39
+
40
+ gaokao_math_datasets = [
41
+ dict(
42
+ type=GaoKaoMATHDataset,
43
+ abbr='GaoKaoMATH',
44
+ path='./data/gaokao_math/test_2k.json',
45
+ reader_cfg=gaokao_math_reader_cfg,
46
+ infer_cfg=gaokao_math_infer_cfg,
47
+ eval_cfg=gaokao_math_eval_cfg)
48
+ ]
opencompass/configs/datasets/humaneval_cn/humaneval_cn_gen.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from .humaneval_cn_gen_6313aa import humaneval_cn_datasets # noqa: F401, F403
opencompass/configs/datasets/humaneval_cn/humaneval_cn_gen_6313aa.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import HumanevalDataset, HumanEvalEvaluator, humaneval_postprocess_v2
5
+
6
+ humaneval_reader_cfg = dict(
7
+ input_columns=['prompt'], output_column='task_id', train_split='test')
8
+
9
+ # TODO: allow empty output-column
10
+ humaneval_infer_cfg = dict(
11
+ prompt_template=dict(
12
+ type=PromptTemplate,
13
+ template=dict(round=[
14
+ dict(
15
+ role='HUMAN',
16
+ prompt='完成以下Python代码任务:\n{prompt}'),
17
+ ])),
18
+ retriever=dict(type=ZeroRetriever),
19
+ inferencer=dict(type=GenInferencer, max_out_len=512))
20
+
21
+ humaneval_eval_cfg = dict(
22
+ evaluator=dict(type=HumanEvalEvaluator),
23
+ pred_role='BOT',
24
+ k=[1, 10, 100], # the parameter only for humaneval
25
+ pred_postprocessor=dict(type=humaneval_postprocess_v2),
26
+ )
27
+
28
+ humaneval_cn_datasets = [
29
+ dict(
30
+ abbr='openai_humaneval_cn',
31
+ type=HumanevalDataset,
32
+ path='opencompass/humaneval_cn',
33
+ reader_cfg=humaneval_reader_cfg,
34
+ infer_cfg=humaneval_infer_cfg,
35
+ eval_cfg=humaneval_eval_cfg)
36
+
37
+ ]
opencompass/configs/datasets/humaneval_cn/humaneval_cn_passk_gen_6313aa.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import HumanevalDataset, HumanEvalEvaluator, humaneval_postprocess_v2
5
+
6
+ humaneval_reader_cfg = dict(
7
+ input_columns=['prompt'], output_column='task_id', train_split='test')
8
+
9
+ # TODO: allow empty output-column
10
+ humaneval_infer_cfg = dict(
11
+ prompt_template=dict(
12
+ type=PromptTemplate,
13
+ template=dict(round=[
14
+ dict(
15
+ role='HUMAN',
16
+ prompt='完成以下Python代码任务:\n{prompt}'),
17
+ ])),
18
+ retriever=dict(type=ZeroRetriever),
19
+ inferencer=dict(type=GenInferencer, max_out_len=512))
20
+
21
+ humaneval_eval_cfg = dict(
22
+ evaluator=dict(type=HumanEvalEvaluator),
23
+ pred_role='BOT',
24
+ k=[1, 10, 100], # the parameter only for humaneval
25
+ pred_postprocessor=dict(type=humaneval_postprocess_v2),
26
+ )
27
+
28
+ humaneval_cn_datasets = [
29
+ dict(
30
+ abbr='openai_humaneval_cn_passk',
31
+ type=HumanevalDataset,
32
+ path='opencompass/humaneval_cn',
33
+ reader_cfg=humaneval_reader_cfg,
34
+ infer_cfg=humaneval_infer_cfg,
35
+ eval_cfg=humaneval_eval_cfg)
36
+
37
+ ]
opencompass/configs/datasets/humaneval_cn/humaneval_cn_repeat10_gen_6313aa.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import HumanevalDataset, HumanEvalEvaluator, humaneval_postprocess_v2
5
+
6
+ humaneval_reader_cfg = dict(
7
+ input_columns=['prompt'], output_column='task_id', train_split='test')
8
+
9
+ # TODO: allow empty output-column
10
+ humaneval_infer_cfg = dict(
11
+ prompt_template=dict(
12
+ type=PromptTemplate,
13
+ template=dict(round=[
14
+ dict(
15
+ role='HUMAN',
16
+ prompt='完成以下Python代码任务:\n{prompt}'),
17
+ ])),
18
+ retriever=dict(type=ZeroRetriever),
19
+ inferencer=dict(type=GenInferencer, max_out_len=512))
20
+
21
+ humaneval_eval_cfg = dict(
22
+ evaluator=dict(type=HumanEvalEvaluator),
23
+ pred_role='BOT',
24
+ k=[1, 10, 100], # the parameter only for humaneval
25
+ pred_postprocessor=dict(type=humaneval_postprocess_v2),
26
+ )
27
+
28
+ humaneval_cn_datasets = [
29
+ dict(
30
+ abbr='openai_humaneval_cn_repeat10',
31
+ type=HumanevalDataset,
32
+ path='opencompass/humaneval_cn',
33
+ num_repeats=10,
34
+ reader_cfg=humaneval_reader_cfg,
35
+ infer_cfg=humaneval_infer_cfg,
36
+ eval_cfg=humaneval_eval_cfg)
37
+
38
+ ]
opencompass/configs/datasets/jigsawmultilingual/jigsawmultilingual_clp.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from .jigsawmultilingual_clp_fe50d8 import jigsawmultilingual_datasets # noqa: F401, F403
opencompass/configs/datasets/jigsawmultilingual/jigsawmultilingual_clp_1af0ae.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import CLPInferencer
4
+ from opencompass.openicl.icl_evaluator import AUCROCEvaluator
5
+ from opencompass.datasets import JigsawMultilingualDataset
6
+
7
+ jigsawmultilingual_reader_cfg = dict(
8
+ input_columns=['text'],
9
+ output_column='label',
10
+ train_split='test',
11
+ test_split='test')
12
+
13
+ # Jigsaw multilingual toxic dataset is not supported in Huggingface
14
+ # Please download the test dataset files `test.csv` and `test_labels.csv` from
15
+ # below link and move them the data/ folder to perform evaluation.
16
+ # https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data
17
+ jigsawmultilingual_infer_cfg = dict(
18
+ prompt_template=dict(
19
+ type=PromptTemplate,
20
+ template='Text: {text}\nQuestion: '
21
+ 'Does the above text contain rude, hateful, aggressive, disrespectful '
22
+ 'or unreasonable language?\nAnswer:'),
23
+ retriever=dict(type=ZeroRetriever),
24
+ inferencer=dict(type=CLPInferencer))
25
+
26
+ jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
27
+
28
+ lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr']
29
+ jigsawmultilingual_datasets = []
30
+
31
+ for _l in lang:
32
+ jigsawmultilingual_datasets.append(
33
+ dict(
34
+ abbr=f'jigsaw_multilingual_{_l}',
35
+ type=JigsawMultilingualDataset,
36
+ path='data/jigsawmultilingual/test.csv',
37
+ label='data/jigsawmultilingual/test_labels.csv',
38
+ lang=_l,
39
+ reader_cfg=jigsawmultilingual_reader_cfg,
40
+ infer_cfg=jigsawmultilingual_infer_cfg,
41
+ eval_cfg=jigsawmultilingual_eval_cfg))
42
+
43
+ del lang, _l
opencompass/configs/datasets/jigsawmultilingual/jigsawmultilingual_clp_fe50d8.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import CLPInferencer
4
+ from opencompass.openicl.icl_evaluator import AUCROCEvaluator
5
+ from opencompass.datasets import JigsawMultilingualDataset
6
+
7
+ jigsawmultilingual_reader_cfg = dict(
8
+ input_columns=['text'],
9
+ output_column='label',
10
+ train_split='test',
11
+ test_split='test')
12
+
13
+ # Jigsaw multilingual toxic dataset is not supported in Huggingface
14
+ # Please download the test dataset files `test.csv` and `test_labels.csv` from
15
+ # below link and move them the data/ folder to perform evaluation.
16
+ # https://www.kaggle.com/competitions/jigsaw-multilingual-toxic-comment-classification/data
17
+ jigsawmultilingual_infer_cfg = dict(
18
+ prompt_template=dict(
19
+ type=PromptTemplate,
20
+ template=dict(round=[
21
+ dict(
22
+ role='HUMAN',
23
+ prompt='Text: {text}\nQuestion: Does the above text contain '
24
+ 'rude, hateful, aggressive, disrespectful or unreasonable '
25
+ 'language?\nAnswer:')
26
+ ])),
27
+ retriever=dict(type=ZeroRetriever),
28
+ inferencer=dict(type=CLPInferencer))
29
+
30
+ jigsawmultilingual_eval_cfg = dict(evaluator=dict(type=AUCROCEvaluator), )
31
+
32
+ lang = ['es', 'fr', 'it', 'pt', 'ru', 'tr']
33
+ jigsawmultilingual_datasets = []
34
+
35
+ for _l in lang:
36
+ jigsawmultilingual_datasets.append(
37
+ dict(
38
+ abbr=f'jigsaw_multilingual_{_l}',
39
+ type=JigsawMultilingualDataset,
40
+ path='data/jigsawmultilingual/test.csv',
41
+ label='data/jigsawmultilingual/test_labels.csv',
42
+ lang=_l,
43
+ reader_cfg=jigsawmultilingual_reader_cfg,
44
+ infer_cfg=jigsawmultilingual_infer_cfg,
45
+ eval_cfg=jigsawmultilingual_eval_cfg))
46
+
47
+ del lang, _l
opencompass/configs/datasets/livecodebench/README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # LiveCodeBench
2
+
3
+ ## Dataset
4
+
5
+ LiveCodeBench provides holistic and contamination-free evaluation of coding capabilities of LLMs. Particularly, LiveCodeBench continuously collects new problems over time from contests across three competition platforms -- LeetCode, AtCoder, and CodeForces. Next, LiveCodeBench also focuses on a broader range of code-related capabilities, such as self-repair, code execution, and test output prediction, beyond just code generation. Currently, LiveCodeBench hosts four hundred high-quality coding problems that were published between May 2023 and March 2024.
6
+
7
+ - Origin Project: https://livecodebench.github.io/leaderboard.html
8
+
9
+ ## Setting
10
+
11
+ | Model Type | Code Generation | Test Output Prediction | Code Execution |
12
+ |------------|--------|--------|--------|
13
+ | Base Model | ❌ | ❌ | ❌ |
14
+ | Chat Model | ✅ | ✅ | ✅ |
15
+
16
+
17
+
18
+ ## Baseline Performance
19
+
20
+
21
+ | Model Type | Code Generation(pass@1) | Test Output Prediction(pass@1) | Code Execution(pass@1) |
22
+ |------------|--------|--------|--------|
23
+ | Qwen2.5-7B-Instruct(HF) | 39.25 | 48.64 | 41.96 |
24
+ | Meta-Llama-3.1-8B-Instruct(HF) | 20.25 | 24.66 | 17.12 |
25
+
26
+
27
+ ## Citation
28
+
29
+ ```bibtex
30
+ @article{jain2024livecodebench,
31
+ author = {Naman Jain, King Han, Alex Gu, Wen-Ding Li, Fanjia Yan, Tianjun Zhang, Sida Wang, Armando Solar-Lezama, Koushik Sen, Ion Stoica},
32
+ title = {LiveCodeBench: Holistic and Contamination Free Evaluation of Large Language Models for Code},
33
+ year = {2024},
34
+ journal = {arXiv preprint},
35
+ }
36
+ @misc{2023opencompass,
37
+ title={OpenCompass: A Universal Evaluation Platform for Foundation Models},
38
+ author={OpenCompass Contributors},
39
+ howpublished = {\url{https://github.com/open-compass/opencompass}},
40
+ year={2023}
41
+ }
42
+ ```
opencompass/configs/datasets/livecodebench/livecodebench_gen.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from .livecodebench_gen_6966bc import LCB_datasets # noqa: F401, F403
opencompass/configs/datasets/livecodebench/livecodebench_gen_6966bc.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import (
5
+ LCBCodeGenerationDataset,
6
+ LCBCodeExecutionDataset,
7
+ LCBTestOutputPredictionDataset,
8
+ LCBCodeGenerationEvaluator,
9
+ LCBCodeExecutionEvaluator,
10
+ LCBTestOutputEvaluator
11
+ )
12
+ from opencompass.datasets.livecodebench import TestOutputPromptConstants
13
+
14
+
15
+ lcb_code_generation_reader_cfg = dict(
16
+ input_columns=[
17
+ 'question_content',
18
+ 'format_prompt',
19
+ ],
20
+ # output_column='evaluation_sample',
21
+ output_column='question_id',
22
+ )
23
+
24
+ SYSTEM_MESSAGE_GENERIC = f'You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.'
25
+
26
+ prompt_template = '### Question:\n{question_content}\n\n{format_prompt}' + \
27
+ '### Answer: (use the provided format with backticks)\n\n'
28
+
29
+
30
+ # Code Generation Tasks
31
+ lcb_code_generation_infer_cfg = dict(
32
+ prompt_template=dict(
33
+ type=PromptTemplate,
34
+ template=dict(
35
+ round=[
36
+ dict(
37
+ role='HUMAN',
38
+ prompt=prompt_template
39
+ )
40
+ ]
41
+ )
42
+ ),
43
+ retriever=dict(type=ZeroRetriever),
44
+ inferencer=dict(type=GenInferencer, max_out_len=1024)
45
+ )
46
+
47
+ lcb_code_generation_eval_cfg = dict(
48
+ evaluator=dict(
49
+ type=LCBCodeGenerationEvaluator,
50
+ num_process_evaluate=4,
51
+ timeout=6,
52
+ ),
53
+ pred_role='BOT',
54
+ )
55
+
56
+ LCBCodeGeneration_dataset = dict(
57
+ type=LCBCodeGenerationDataset,
58
+ abbr='lcb_code_generation',
59
+ path='opencompass/code_generation_lite',
60
+ reader_cfg=lcb_code_generation_reader_cfg,
61
+ infer_cfg=lcb_code_generation_infer_cfg,
62
+ eval_cfg=lcb_code_generation_eval_cfg
63
+ )
64
+
65
+ # Code Execution Dataset
66
+ lcb_code_execution_reader_cfg = dict(
67
+ input_columns=[
68
+ 'prompt',
69
+ ],
70
+ output_column='evaluation_sample',
71
+ )
72
+
73
+ lcb_code_execution_infer_cfg = dict(
74
+ prompt_template=dict(
75
+ type=PromptTemplate,
76
+ template=dict(
77
+ begin=[
78
+ dict(
79
+ role='SYSTEM',
80
+ fallback_role='HUMAN',
81
+ prompt='You are an expert at Python programming, code execution, test case generation, and fuzzing.'
82
+ ),
83
+ ],
84
+ round=[
85
+ dict(
86
+ role='HUMAN',
87
+ prompt='{prompt}'
88
+ )
89
+ ]
90
+ )
91
+ ),
92
+ retriever=dict(type=ZeroRetriever),
93
+ inferencer=dict(type=GenInferencer, max_out_len=1024)
94
+ )
95
+
96
+ lcb_code_execution_eval_cfg = dict(
97
+ evaluator=dict(
98
+ type=LCBCodeExecutionEvaluator,
99
+ ),
100
+ pred_role='BOT',
101
+ )
102
+
103
+ LCBCodeExecution_dataset = dict(
104
+ type=LCBCodeExecutionDataset,
105
+ abbr='lcb_code_execution',
106
+ path='opencompass/execution-v2',
107
+ reader_cfg=lcb_code_execution_reader_cfg,
108
+ infer_cfg=lcb_code_execution_infer_cfg,
109
+ eval_cfg=lcb_code_execution_eval_cfg,
110
+ )
111
+
112
+ # TestOuputput Dataset
113
+ lcb_test_output_reader_cfg = dict(
114
+ input_columns=[
115
+ 'prompt',
116
+ ],
117
+ output_column='evaluation_sample',
118
+ )
119
+
120
+ system_prompt = 'You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.'
121
+
122
+ lcb_test_output_infer_cfg = dict(
123
+ prompt_template=dict(
124
+ type=PromptTemplate,
125
+ template=dict(
126
+ # begin=[
127
+ # dict(
128
+ # role='SYSTEM',
129
+ # prompt=system_prompt
130
+ # ),
131
+ # ],
132
+ round=[
133
+ dict(
134
+ role='HUMAN',
135
+ prompt='{prompt}'
136
+ )
137
+ ]
138
+ )
139
+ ),
140
+ retriever=dict(type=ZeroRetriever),
141
+ inferencer=dict(type=GenInferencer, max_out_len=1024)
142
+ )
143
+
144
+ lcb_test_output_eval_cfg = dict(
145
+ evaluator=dict(
146
+ type=LCBTestOutputEvaluator,
147
+ ),
148
+ pred_role='BOT',
149
+ )
150
+
151
+ LCBTestOutput_dataset = dict(
152
+ type=LCBTestOutputPredictionDataset,
153
+ abbr='lcb_test_output',
154
+ path='opencompass/test_generation',
155
+ reader_cfg=lcb_test_output_reader_cfg,
156
+ infer_cfg=lcb_test_output_infer_cfg,
157
+ eval_cfg=lcb_test_output_eval_cfg,
158
+ )
159
+
160
+ LCB_datasets = [
161
+ LCBCodeGeneration_dataset,
162
+ LCBCodeExecution_dataset,
163
+ LCBTestOutput_dataset,
164
+ ]
opencompass/configs/datasets/livecodebench/livecodebench_gen_b2b0fd.py ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import (
5
+ LCBCodeGenerationDataset,
6
+ LCBCodeExecutionDataset,
7
+ LCBTestOutputPredictionDataset,
8
+ LCBCodeGenerationEvaluator,
9
+ LCBCodeExecutionEvaluator,
10
+ LCBTestOutputEvaluator
11
+ )
12
+ from opencompass.datasets.livecodebench import TestOutputPromptConstants
13
+
14
+
15
+ lcb_code_generation_reader_cfg = dict(
16
+ input_columns=[
17
+ 'question_content',
18
+ 'format_prompt',
19
+ ],
20
+ # output_column='evaluation_sample',
21
+ output_column='question_id',
22
+ )
23
+
24
+ SYSTEM_MESSAGE_GENERIC = f'You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.'
25
+
26
+ prompt_template = '### Question:\n{question_content}\n\n{format_prompt}' + \
27
+ '### Answer: (use the provided format with backticks)\n\n'
28
+
29
+
30
+ # Code Generation Tasks
31
+ lcb_code_generation_infer_cfg = dict(
32
+ prompt_template=dict(
33
+ type=PromptTemplate,
34
+ template=dict(
35
+ round=[
36
+ dict(
37
+ role='HUMAN',
38
+ prompt=prompt_template
39
+ )
40
+ ]
41
+ )
42
+ ),
43
+ retriever=dict(type=ZeroRetriever),
44
+ inferencer=dict(type=GenInferencer, max_out_len=1024)
45
+ )
46
+
47
+ lcb_code_generation_eval_cfg = dict(
48
+ evaluator=dict(
49
+ type=LCBCodeGenerationEvaluator,
50
+ num_process_evaluate=4,
51
+ timeout=6,
52
+ ),
53
+ pred_role='BOT',
54
+ )
55
+
56
+ LCBCodeGeneration_dataset = dict(
57
+ type=LCBCodeGenerationDataset,
58
+ abbr='lcb_code_generation',
59
+ path='opencompass/code_generation_lite',
60
+ reader_cfg=lcb_code_generation_reader_cfg,
61
+ infer_cfg=lcb_code_generation_infer_cfg,
62
+ eval_cfg=lcb_code_generation_eval_cfg
63
+ )
64
+
65
+ # Code Execution Dataset
66
+ lcb_code_execution_reader_cfg = dict(
67
+ input_columns=[
68
+ 'prompt',
69
+ ],
70
+ output_column='evaluation_sample',
71
+ )
72
+
73
+ lcb_code_execution_infer_cfg = dict(
74
+ prompt_template=dict(
75
+ type=PromptTemplate,
76
+ template=dict(
77
+ begin=[
78
+ dict(
79
+ role='SYSTEM',
80
+ prompt='You are an expert at Python programming, code execution, test case generation, and fuzzing.'
81
+ ),
82
+ ],
83
+ round=[
84
+ dict(
85
+ role='HUMAN',
86
+ prompt='{prompt}'
87
+ )
88
+ ]
89
+ )
90
+ ),
91
+ retriever=dict(type=ZeroRetriever),
92
+ inferencer=dict(type=GenInferencer, max_out_len=1024)
93
+ )
94
+
95
+ lcb_code_execution_eval_cfg = dict(
96
+ evaluator=dict(
97
+ type=LCBCodeExecutionEvaluator,
98
+ ),
99
+ pred_role='BOT',
100
+ )
101
+
102
+ LCBCodeExecution_dataset = dict(
103
+ type=LCBCodeExecutionDataset,
104
+ abbr='lcb_code_execution',
105
+ path='opencompass/execution-v2',
106
+ reader_cfg=lcb_code_execution_reader_cfg,
107
+ infer_cfg=lcb_code_execution_infer_cfg,
108
+ eval_cfg=lcb_code_execution_eval_cfg,
109
+ )
110
+
111
+ # TestOuputput Dataset
112
+ lcb_test_output_reader_cfg = dict(
113
+ input_columns=[
114
+ 'prompt',
115
+ ],
116
+ output_column='evaluation_sample',
117
+ )
118
+
119
+ system_prompt = 'You are an expert Python programmer. You will be given a question (problem specification) and will generate a correct Python program that matches the specification and passes all tests. You will NOT return anything except for the program.'
120
+
121
+ lcb_test_output_infer_cfg = dict(
122
+ prompt_template=dict(
123
+ type=PromptTemplate,
124
+ template=dict(
125
+ # begin=[
126
+ # dict(
127
+ # role='SYSTEM',
128
+ # prompt=system_prompt
129
+ # ),
130
+ # ],
131
+ round=[
132
+ dict(
133
+ role='HUMAN',
134
+ prompt='{prompt}'
135
+ )
136
+ ]
137
+ )
138
+ ),
139
+ retriever=dict(type=ZeroRetriever),
140
+ inferencer=dict(type=GenInferencer, max_out_len=1024)
141
+ )
142
+
143
+ lcb_test_output_eval_cfg = dict(
144
+ evaluator=dict(
145
+ type=LCBTestOutputEvaluator,
146
+ ),
147
+ pred_role='BOT',
148
+ )
149
+
150
+ LCBTestOutput_dataset = dict(
151
+ type=LCBTestOutputPredictionDataset,
152
+ abbr='lcb_test_output',
153
+ path='opencompass/test_generation',
154
+ reader_cfg=lcb_test_output_reader_cfg,
155
+ infer_cfg=lcb_test_output_infer_cfg,
156
+ eval_cfg=lcb_test_output_eval_cfg,
157
+ )
158
+
159
+ LCB_datasets = [
160
+ LCBCodeGeneration_dataset,
161
+ LCBCodeExecution_dataset,
162
+ LCBTestOutput_dataset,
163
+ ]
opencompass/configs/datasets/mbpp_plus/deprecated_mbpp_plus_gen_94815c.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import MBPPEvaluator, MBPPPlusDataset
5
+
6
+ mbpp_plus_reader_cfg = dict(
7
+ input_columns=['text', 'test_list'], output_column='task_id')
8
+
9
+ mbpp_plus_infer_cfg = dict(
10
+ prompt_template=dict(
11
+ type=PromptTemplate,
12
+ template=dict(
13
+ round=[
14
+ dict(
15
+ role='HUMAN',
16
+ prompt=
17
+ 'You are an expert Python programmer, and here is your task: Write a function to find the shared elements from the given two lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\n assert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \n assert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n'
18
+ ),
19
+ dict(
20
+ role='BOT',
21
+ prompt=
22
+ "[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\n return tuple(set(test_tup1) & set(test_tup2))' \n[DONE] \n\n "
23
+ ),
24
+ dict(
25
+ role='HUMAN',
26
+ prompt=
27
+ 'You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \n assert is_not_prime(10) == True \n assert is_not_prime(35) == True \n'
28
+ ),
29
+ dict(
30
+ role='BOT',
31
+ prompt=
32
+ "[BEGIN]\n 'import math\ndef is_not_prime(n):\n if n == 1:\n return True\n for i in range(2, int(math.sqrt(n))+1):\n if n % i == 0:\n return True\n return False' \n[DONE] \n\n "
33
+ ),
34
+ dict(
35
+ role='HUMAN',
36
+ prompt=
37
+ 'You are an expert Python programmer, and here is your task: Write a function to find the n largest integers from a given list of numbers, returned in descending order. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n'
38
+ ),
39
+ dict(
40
+ role='BOT',
41
+ prompt=
42
+ "[BEGIN]\n 'import heapq as hq\ndef heap_queue_largest(nums: list,n: int) -> list:\n largest_nums = hq.nlargest(n, nums)\n return largest_nums' \n[DONE] \n\n "
43
+ ),
44
+ dict(
45
+ role='HUMAN',
46
+ prompt=
47
+ 'You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n'
48
+ ),
49
+ dict(role='BOT', prompt='[BEGIN]\n'),
50
+ ], )),
51
+ retriever=dict(type=ZeroRetriever),
52
+ inferencer=dict(type=GenInferencer, max_out_len=512))
53
+
54
+ mbpp_plus_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator, metric='MBPPPlus'), pred_role='BOT')
55
+
56
+ mbpp_plus_datasets = [
57
+ dict(
58
+ type=MBPPPlusDataset,
59
+ abbr='mbpp_plus',
60
+ path='./data/mbpp_plus/mbpp_plus.jsonl',
61
+ reader_cfg=mbpp_plus_reader_cfg,
62
+ infer_cfg=mbpp_plus_infer_cfg,
63
+ eval_cfg=mbpp_plus_eval_cfg)
64
+ ]
opencompass/configs/datasets/mbpp_plus/mbpp_plus_gen_0b836a.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import MBPPEvaluator, MBPPPlusDataset
5
+
6
+ mbpp_plus_reader_cfg = dict(
7
+ input_columns=['text', 'test_list'], output_column='task_id')
8
+
9
+ mbpp_plus_infer_cfg = dict(
10
+ prompt_template=dict(
11
+ type=PromptTemplate,
12
+ template=dict(
13
+ round=[
14
+ dict(
15
+ role='HUMAN',
16
+ prompt=
17
+ 'You are an expert Python programmer, and here is your task: Write a function to find the shared elements from the given two lists. Your code should pass these tests:\n\n assert similar_elements((3, 4, 5, 6),(5, 7, 4, 10)) == (4, 5)\nassert similar_elements((1, 2, 3, 4),(5, 4, 3, 7)) == (3, 4) \nassert similar_elements((11, 12, 14, 13),(17, 15, 14, 13)) == (13, 14) \n'
18
+ ),
19
+ dict(
20
+ role='BOT',
21
+ prompt=
22
+ "[BEGIN]\n 'def similar_elements(test_tup1, test_tup2):\n return tuple(set(test_tup1) & set(test_tup2))' \n[DONE] \n\n "
23
+ ),
24
+ dict(
25
+ role='HUMAN',
26
+ prompt=
27
+ 'You are an expert Python programmer, and here is your task: Write a python function to identify non-prime numbers. Your code should pass these tests:\n\n assert is_not_prime(2) == False \nassert is_not_prime(10) == True \nassert is_not_prime(35) == True \n'
28
+ ),
29
+ dict(
30
+ role='BOT',
31
+ prompt=
32
+ "[BEGIN]\n 'import math\ndef is_not_prime(n):\n if n == 1:\n return True\n for i in range(2, int(math.sqrt(n))+1):\n if n % i == 0:\n return True\n return False' \n[DONE] \n\n "
33
+ ),
34
+ dict(
35
+ role='HUMAN',
36
+ prompt=
37
+ 'You are an expert Python programmer, and here is your task: Write a function to find the n largest integers from a given list of numbers, returned in descending order. Your code should pass these tests:\n\n assert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],3)==[85, 75, 65] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],2)==[85, 75] \nassert heap_queue_largest( [25, 35, 22, 85, 14, 65, 75, 22, 58],5)==[85, 75, 65, 58, 35] \n'
38
+ ),
39
+ dict(
40
+ role='BOT',
41
+ prompt=
42
+ "[BEGIN]\n 'import heapq as hq\ndef heap_queue_largest(nums: list,n: int) -> list:\n largest_nums = hq.nlargest(n, nums)\n return largest_nums' \n[DONE] \n\n "
43
+ ),
44
+ dict(
45
+ role='HUMAN',
46
+ prompt=
47
+ 'You are an expert Python programmer, and here is your task: {text} Your code should pass these tests:\n\n {test_list} \n'
48
+ ),
49
+ dict(role='BOT', prompt='[BEGIN]\n'),
50
+ ], )),
51
+ retriever=dict(type=ZeroRetriever),
52
+ inferencer=dict(type=GenInferencer, max_out_len=512))
53
+
54
+ mbpp_plus_eval_cfg = dict(evaluator=dict(type=MBPPEvaluator, metric='MBPPPlus'), pred_role='BOT')
55
+
56
+ mbpp_plus_datasets = [
57
+ dict(
58
+ type=MBPPPlusDataset,
59
+ abbr='mbpp_plus',
60
+ path='./data/mbpp_plus/mbpp_plus.jsonl',
61
+ reader_cfg=mbpp_plus_reader_cfg,
62
+ infer_cfg=mbpp_plus_infer_cfg,
63
+ eval_cfg=mbpp_plus_eval_cfg)
64
+ ]
opencompass/configs/datasets/nq_cn/nqcn_gen.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ from mmengine.config import read_base
2
+
3
+ with read_base():
4
+ from .nqcn_gen_141737 import nqcn_datasets # noqa: F401, F403
opencompass/configs/datasets/nq_cn/nqcn_gen_141737.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.datasets import NaturalQuestionDatasetCN, NQEvaluatorCN
5
+
6
+ nqcn_reader_cfg = dict(
7
+ input_columns=['question'], output_column='answer', train_split='test'
8
+ )
9
+
10
+ nqcn_infer_cfg = dict(
11
+ prompt_template=dict(
12
+ type=PromptTemplate,
13
+ template=dict(
14
+ round=[
15
+ dict(role='HUMAN', prompt='问题: {question}?\n答案是:'),
16
+ ],
17
+ ),
18
+ ),
19
+ retriever=dict(type=ZeroRetriever),
20
+ inferencer=dict(type=GenInferencer),
21
+ )
22
+
23
+ nqcn_eval_cfg = dict(evaluator=dict(type=NQEvaluatorCN), pred_role='BOT')
24
+
25
+ nqcn_datasets = [
26
+ dict(
27
+ abbr='nq_cn',
28
+ type=NaturalQuestionDatasetCN,
29
+ path='./data/nq_cn',
30
+ reader_cfg=nqcn_reader_cfg,
31
+ infer_cfg=nqcn_infer_cfg,
32
+ eval_cfg=nqcn_eval_cfg,
33
+ )
34
+ ]
opencompass/configs/datasets/race/race_cot_gen_d95929.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.openicl.icl_prompt_template import PromptTemplate
2
+ from opencompass.openicl.icl_retriever import ZeroRetriever
3
+ from opencompass.openicl.icl_inferencer import GenInferencer
4
+ from opencompass.openicl.icl_evaluator import AccEvaluator
5
+ from opencompass.datasets import RaceDataset
6
+ from opencompass.utils.text_postprocessors import (
7
+ first_option_postprocess,
8
+ )
9
+
10
+ QUERY_TEMPLATE = """
11
+ Answer the following multiple choice question. The last line of your response should be of the following format: 'ANSWER: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering.
12
+
13
+ Article: {article}
14
+
15
+ Q: {question}
16
+
17
+ A. {A}
18
+ B. {B}
19
+ C. {C}
20
+ D. {D}
21
+ """.strip()
22
+
23
+ race_reader_cfg = dict(
24
+ input_columns=['article', 'question', 'A', 'B', 'C', 'D'],
25
+ output_column='answer',
26
+ train_split='validation',
27
+ test_split='test',
28
+ )
29
+
30
+ race_infer_cfg = dict(
31
+ prompt_template=dict(
32
+ type=PromptTemplate,
33
+ template=dict(
34
+ round=[
35
+ dict(role='HUMAN', prompt=QUERY_TEMPLATE),
36
+ ]
37
+ ),
38
+ ),
39
+ retriever=dict(type=ZeroRetriever),
40
+ inferencer=dict(type=GenInferencer),
41
+ )
42
+
43
+ race_eval_cfg = dict(
44
+ evaluator=dict(type=AccEvaluator),
45
+ pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
46
+ pred_role='BOT',
47
+ )
48
+
49
+ race_datasets = [
50
+ dict(
51
+ abbr='race-middle',
52
+ type=RaceDataset,
53
+ path='opencompass/race',
54
+ name='middle',
55
+ reader_cfg=race_reader_cfg,
56
+ infer_cfg=race_infer_cfg,
57
+ eval_cfg=race_eval_cfg,
58
+ ),
59
+ dict(
60
+ abbr='race-high',
61
+ type=RaceDataset,
62
+ path='opencompass/race',
63
+ name='high',
64
+ reader_cfg=race_reader_cfg,
65
+ infer_cfg=race_infer_cfg,
66
+ eval_cfg=race_eval_cfg,
67
+ ),
68
+ ]