Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Chinese_Movie_and_Music_Recommendation_Translate-EN.txt +40 -0
- opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Chinese_Natural_Language_Inference_Translate-EN.txt +28 -0
- opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Chinese_Reading_Comprehension_Translate-EN.txt +26 -0
- opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Chinese_Sequence_Understanding_Translate-EN.txt +25 -0
- opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Chinese_Sport_Understanding_Translate-EN.txt +22 -0
- opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Chinese_Time_Understanding_Translate-EN.txt +28 -0
- opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Global_Anachronisms_Judgment_Translate-EN.txt +25 -0
- opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Global_Movie_and_Music_Recommendation_Translate-EN.txt +40 -0
- opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Global_Natural_Language_Inference_Translate-EN.txt +28 -0
- opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Global_Reading_Comprehension_Translate-EN.txt +25 -0
- opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Global_Sequence_Understanding_Translate-EN.txt +25 -0
- opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Global_Sport_Understanding_Translate-EN.txt +22 -0
- opencompass/configs/datasets/infinitebench/infinitebenchcodedebug/infinitebench_codedebug_gen.py +4 -0
- opencompass/configs/datasets/infinitebench/infinitebenchcodedebug/infinitebench_codedebug_gen_276a42.py +43 -0
- opencompass/configs/datasets/infinitebench/infinitebenchcoderun/infinitebench_coderun_gen.py +4 -0
- opencompass/configs/datasets/infinitebench/infinitebenchcoderun/infinitebench_coderun_gen_1a76bd.py +43 -0
- opencompass/configs/datasets/infinitebench/infinitebenchendia/infinitebench_endia_gen.py +4 -0
- opencompass/configs/datasets/infinitebench/infinitebenchendia/infinitebench_endia_gen_c96eb5.py +40 -0
- opencompass/configs/datasets/infinitebench/infinitebenchenmc/infinitebench_enmc_gen.py +4 -0
- opencompass/configs/datasets/infinitebench/infinitebenchenmc/infinitebench_enmc_gen_3a4102.py +43 -0
- opencompass/configs/datasets/infinitebench/infinitebenchenqa/infinitebench_enqa_gen.py +4 -0
- opencompass/configs/datasets/infinitebench/infinitebenchenqa/infinitebench_enqa_gen_a1640c.py +40 -0
- opencompass/configs/datasets/infinitebench/infinitebenchensum/infinitebench_ensum_gen.py +4 -0
- opencompass/configs/datasets/infinitebench/infinitebenchensum/infinitebench_ensum_gen_cfbc08.py +41 -0
- opencompass/configs/datasets/infinitebench/infinitebenchmathcalc/infinitebench_mathcalc_gen.py +4 -0
- opencompass/configs/datasets/infinitebench/infinitebenchmathcalc/infinitebench_mathcalc_gen_78d17e.py +40 -0
- opencompass/configs/datasets/infinitebench/infinitebenchmathfind/infinitebench_mathfind_gen.py +4 -0
- opencompass/configs/datasets/infinitebench/infinitebenchmathfind/infinitebench_mathfind_gen_6d799e.py +43 -0
- opencompass/configs/datasets/infinitebench/infinitebenchretrievekv/infinitebench_retrievekv_gen.py +4 -0
- opencompass/configs/datasets/infinitebench/infinitebenchretrievekv/infinitebench_retrievekv_gen_06b3ac.py +40 -0
- opencompass/configs/datasets/infinitebench/infinitebenchretrievenumber/infinitebench_retrievenumber_gen.py +4 -0
- opencompass/configs/datasets/infinitebench/infinitebenchretrievenumber/infinitebench_retrievenumber_gen_047436.py +43 -0
- opencompass/configs/datasets/infinitebench/infinitebenchretrievepasskey/infinitebench_retrievepasskey_gen.py +4 -0
- opencompass/configs/datasets/infinitebench/infinitebenchretrievepasskey/infinitebench_retrievepasskey_gen_62ff68.py +43 -0
- opencompass/configs/datasets/infinitebench/infinitebenchzhqa/infinitebench_zhqa_gen.py +4 -0
- opencompass/configs/datasets/infinitebench/infinitebenchzhqa/infinitebench_zhqa_gen_1e5293.py +41 -0
- opencompass/configs/datasets/lveval/lvevalcmrc_mixup/lveval_cmrc_mixup_gen.py +6 -0
- opencompass/configs/datasets/lveval/lvevalcmrc_mixup/lveval_cmrc_mixup_gen_465823.py +54 -0
- opencompass/configs/datasets/lveval/lvevaldureader_mixup/lveval_dureader_mixup_gen.py +6 -0
- opencompass/configs/datasets/lveval/lvevaldureader_mixup/lveval_dureader_mixup_gen_465823.py +55 -0
- opencompass/configs/datasets/lveval/lvevalfactrecall_en/lveval_factrecall_en_gen.py +6 -0
- opencompass/configs/datasets/lveval/lvevalfactrecall_en/lveval_factrecall_en_gen_9a836f.py +54 -0
- opencompass/configs/datasets/lveval/lvevalfactrecall_zh/lveval_factrecall_zh_gen.py +6 -0
- opencompass/configs/datasets/lveval/lvevalfactrecall_zh/lveval_factrecall_zh_gen_dbee70.py +54 -0
- opencompass/configs/datasets/lveval/lvevalhotpotwikiqa_mixup/lveval_hotpotwikiqa_mixup_gen.py +6 -0
- opencompass/configs/datasets/lveval/lvevalhotpotwikiqa_mixup/lveval_hotpotwikiqa_mixup_gen_77ce82.py +59 -0
- opencompass/configs/datasets/lveval/lvevallic_mixup/lveval_lic_mixup_gen.py +6 -0
- opencompass/configs/datasets/lveval/lvevallic_mixup/lveval_lic_mixup_gen_01eb0c.py +54 -0
- opencompass/configs/datasets/lveval/lvevalloogle_CR_mixup/lveval_loogle_CR_mixup_gen.py +6 -0
- opencompass/configs/datasets/lveval/lvevalloogle_CR_mixup/lveval_loogle_CR_mixup_gen_d7ea36.py +54 -0
opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Chinese_Movie_and_Music_Recommendation_Translate-EN.txt
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Given based on the given list of artworks, identify the most similar.
|
2 |
+
|
3 |
+
Q: The movies that have something in common with these movies Crazy Aliens, Bullitt, Killjoys and Crazy Stone are:
|
4 |
+
Options.
|
5 |
+
(A) 'Lost in Thailand'
|
6 |
+
(B)The Young and the Restless
|
7 |
+
(C)The Children of the River and the Lake
|
8 |
+
(D)The Mekong Operation
|
9 |
+
A: Let's think step by step.
|
10 |
+
Crazy Aliens is a comedy sci-fi film directed by Ning Hao, written by Liu Cixin and Sun Xiaohang, and starring Huang Bo, Shen Teng and Xu Zheng, released in 2019.
|
11 |
+
Bullfight is a black comedy film directed by Guan Hu and starring Huang Bo and Yan Ni, released in 2009.
|
12 |
+
Killing Life is an absurd suspense comedy film directed by Guan Hu and co-starring Huang Bo, Ren Dahua, Su Youpeng and Yu Nan, released in 2012.
|
13 |
+
Crazy Stone is a black comedy film directed by Ning Hao and starring Guo Tao, Liu Hua, Lian Jin, Liu Gang, Xu Zheng and Huang Bo, released in 2006.
|
14 |
+
These are all famous classic Chinese comedy movies with Huang Bo in them, and out of all the options, the only movie that seems to have something in common with these movies is Lost in Thailand (a comedy film directed by Xu Zheng and starring Huang Bo, Xu Zheng and Wang Baoqiang, released in 2012). So the answer is (A).
|
15 |
+
|
16 |
+
Q: The movie that has something in common with these movies 'Red High Beam', 'Alive', 'Big Red Lantern' and 'Hero' is:
|
17 |
+
Option.
|
18 |
+
(A) 'Not One Less'
|
19 |
+
(B)Let the Bullets Fly
|
20 |
+
(C)The Legend of Ah Fei
|
21 |
+
(D)East is East, West is West
|
22 |
+
A: Let's think step by step.
|
23 |
+
Red Sorghum, a war literary film directed by Zhang Yimou and starring Jiang Wen, Gong Li and Teng Rujun, was released in China in 1987.
|
24 |
+
Alive, a drama film directed by Zhang Yimou and starring Ge You and Gong Li, was released in China in 1994.
|
25 |
+
Hanging the Red Lantern High is a drama film directed by Zhang Yimou and starring Gong Li, He Saifei, Ma Jingwu, Cao CuiFen, Kong Lin, and Jin Shuyuan, released in China in 1991.
|
26 |
+
Hero is a martial arts film directed by Zhang Yimou and starring Jet Li, Tony Leung, Maggie Cheung, Chen Dao Ming, Zhang Ziyi and Donnie Yen, released in China in 2002.
|
27 |
+
These are all famous classic Chinese movies directed by Zhang Yimou. Among all the choices, the only movie that seems to have something in common with these movies is Not One Less (a drama movie directed by Zhang Yimou and starring Wei Minzhi and Zhang Huike, released in 1999). So the answer is (A).
|
28 |
+
|
29 |
+
Q: The songs that have something in common with these songs 'Nocturne', 'Materia Medica', 'Listen to Your Mother' and 'Seven Miles' are:
|
30 |
+
Option.
|
31 |
+
(A) 'Nunchaku'
|
32 |
+
(B) 'The Young and the Restless'
|
33 |
+
(C) 'Pomp and Circumstance'
|
34 |
+
(D) "Three's a Crowd
|
35 |
+
A: Let's think step by step.
|
36 |
+
Nocturne is a song performed by Jay Chou, written by Fong, composed by Jay Chou, and arranged by Lin Maike, and included in Jay Chou's 2005 album Chopin in November
|
37 |
+
"Materia Medica" is a song by Jay Chou, written by Fong, composed by Jay Chou, and arranged by Michael Lam, and featured on Jay Chou's 2006 album Still Vantage Point.
|
38 |
+
Listen to Your Mother" is a song by Jay Chou, composed by Jay Chou and arranged by Lin Maike and Hong Jingyao, included in Jay Chou's 2006 album "Still Vantage".
|
39 |
+
Seven Miles is a song performed by Jay Chou, with lyrics by Fong, music by Jay Chou, and arrangement by Chung Hsing-min, included in Jay Chou's 2004 album of the same name, Seven Miles.
|
40 |
+
These are all famous Chinese pop songs sung by Jay Chou. Among all the choices, the only song that seems to have something in common with these songs is "Nunchaku" (sung by Jay Chou, composed by Jay Chou, lyrics by Fong Wen-shan, arranged by Chung Hsing-min, and featured in Jay Chou's 2001 album "Fantasia"). So the answer is (A).
|
opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Chinese_Natural_Language_Inference_Translate-EN.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Choose the correct answer based on the relationship between the two sentences in the question.
|
2 |
+
|
3 |
+
Q:Statement 1: Xiaoming has just gone ice-skating in Shichahai
|
4 |
+
Statement 2: It's winter in Beijing
|
5 |
+
What is the relationship between these two statements?
|
6 |
+
(A) Implicit
|
7 |
+
(B) Contradictory
|
8 |
+
(C) Irrelevant
|
9 |
+
A: Let's think step by step.
|
10 |
+
The first sentence mentions that Xiaoming went ice-skating in Shichahai, which is usually done in winter, and Shichahai is located in Beijing, which implies the information in the second sentence, that is, it is winter at present. So the answer is (A).
|
11 |
+
|
12 |
+
Q:Statement 1:Next week, a heavy rainfall will affect the whole Hebei province
|
13 |
+
Statement 2: Next week, the weather in Shanghai will be fine.
|
14 |
+
What is the relationship between these two statements?
|
15 |
+
(A) Implied
|
16 |
+
(B) Contradictory
|
17 |
+
(C) Irrelevant
|
18 |
+
A: Let's think step by step.
|
19 |
+
These two sentences describe the weather in two geographical locations, one in Hebei Province and the other in Shanghai. Hebei Province and Shanghai are geographically far apart, so the weather conditions in these two places are not necessarily directly related. Therefore, the relationship between the two sentences is irrelevant. So the answer is (C).
|
20 |
+
|
21 |
+
Q:Statement 1: Yesterday, Xiao Ming watched the sunset on a hill and exclaimed, "The sunset is infinite, but it's just near dusk"
|
22 |
+
Statement 2: Yesterday it rained and Ming didn't go out
|
23 |
+
What is the relationship between these two statements?
|
24 |
+
(A) implied
|
25 |
+
(B) contradictory
|
26 |
+
(C) Irrelevant
|
27 |
+
A: Let's think step by step.
|
28 |
+
The first sentence says that Ming saw the sunset on the mountain yesterday, while the second sentence says that it rained yesterday and Ming didn't go out. There is a contradiction between these two sentences because if Ming didn't go out, then he couldn't have seen the sunset on the mountain. So there is a contradiction between these two sentences. So the answer is (B).
|
opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Chinese_Reading_Comprehension_Translate-EN.txt
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Please understand the meaning of the question and choose the correct answer.
|
2 |
+
|
3 |
+
Q:Some Cantonese people don't like chili peppers. Therefore, some southerners don't like chili peppers. Which of the following ensures the validity of the above argument?
|
4 |
+
(A) Some Cantonese people love chili peppers
|
5 |
+
(B) Some Southerners love chili peppers.
|
6 |
+
(C) All Cantonese are Southerners.
|
7 |
+
(D) Some Cantonese people do not love chili or sweets.
|
8 |
+
A: Let's think step by step.
|
9 |
+
In this argument, we infer from "some Cantonese don't like chili peppers" that "some Southerners don't like chili peppers". This inference depends on the relationship between Cantonese and Southerners. In order for this inference to be valid, we need to make sure that at least some Cantonese are Southerners. Therefore, option (C) "All Cantonese are southerners" is the one that ensures that this argument is valid. So the answer is (C).
|
10 |
+
|
11 |
+
|
12 |
+
Q:Thangka is a form of painting rich in Tibetan cultural characteristics, which has a history of more than 1300 years since the rise of the Tubo Dynasty and is a cultural treasure of the Snowy Plateau. It is a cultural treasure of the Snowy Plateau. Its subject matter is not only religious, but also historical and folklore content, so it is also known as the "encyclopedia" to understand Tibet. Therefore, if you want to understand the history of Tibet, in addition to sitting down and reading the strict but slightly dull history books, you can also choose a pleasant and enjoyable way - enjoying the thangka, conversing with many ancient people, imagining the historical events, savoring the rich spiritual world of the Tibetans, and understanding the unique folklore of the Tibetans, which is a process to make the history become a three-dimensional and palpable. This is a process of making history three-dimensional and palpable.
|
13 |
+
(A) Thangkas can provide a cozy and relaxing way to learn about Tibet.
|
14 |
+
(B) The thangkas are a unique record of Tibetan folklore and historical events.
|
15 |
+
(C) The thangka is an "encyclopedic" form of painting for understanding Tibetan culture and history.
|
16 |
+
(D) The thangka is a form of painting that is rich in Tibetan cultural characteristics and has a long history.
|
17 |
+
A: Let's think step by step.
|
18 |
+
It is clearly mentioned in the text that apart from reading rigorous but dull history books, appreciating thangkas is a pleasant and enjoyable way to talk with many ancient people, imagine historical events, experience the rich spiritual world of the Tibetans, and learn about the unique Tibetan folklore. So the main intention of the passage is (A) "Thangkas can provide a cozy and relaxing way to learn about Tibet." So the answer is (A).
|
19 |
+
|
20 |
+
Q:As a principle and method of literary criticism, the concept of "knowing people and discussing the world" was first proposed by Mencius, a thinker of the Warring States period. According to Mencius, in order to make friends with the ancients, it is not enough just to read their poems and books, but also to understand their behavior and the times in which they lived, so as to read and understand their poems and books, and to make friends with them and become their soulmates. What is the incorrect understanding of this passage?
|
21 |
+
(A) People's hearts can communicate and resonate with each other.
|
22 |
+
(B) "Knowing people and discussing the world" has been used as a kind of literary criticism and has shown a strong vitality.
|
23 |
+
(C) "Knowing people and discussing the world" can help the descendants to make friends with the ancients and the ancients to become their soulmates.
|
24 |
+
(D) Knowing about the ancients and their times helps to understand their works.
|
25 |
+
A: Let us think step by step
|
26 |
+
From the passage, we fail to see that (B) "Knowing the people of the world" has been used as a kind of literary criticism and has shown a strong vitality. Although it is true that "knowing people and discussing the world" is a principle and method of literary criticism put forward by the philosopher Mencius during the Warring States period, the passage does not mention that "knowing people and discussing the world" is still in use today, nor does it mention that it has shown great vitality. Therefore, option (B) is a misconception. So the answer is (B).
|
opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Chinese_Sequence_Understanding_Translate-EN.txt
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Choose the correct answer based on the context
|
2 |
+
|
3 |
+
Q: The following characters are correctly ordered in chronological order? Options:
|
4 |
+
(A) Qin Shi Huang, Zhuge Liang, Liu Bang, Bai Juyi
|
5 |
+
(B) Zhuge Liang, Qin Shi Huang, Liu Bang, Bai Ju Yi
|
6 |
+
(C) Qin Shi Huang, Liu Bang, Zhu Geliang, Bai Juyi
|
7 |
+
(D) Bai Juyi, Zhu Geliang, Liu Bang, Qin Shi Huang
|
8 |
+
A: Let's think step by step.
|
9 |
+
There are 4 characters mentioned in the options, in which Qin Shi Huang is from Qin Dynasty, Zhuge Liang is from Three Kingdoms period, Liu Bang is from Han Dynasty period and Bai Juyi is from Tang Dynasty period, according to the chronological order they are Qin Dynasty, Han Dynasty, Three Kingdoms period and Tang Dynasty, so the characters are sequentially Qin Shi Huang, Liu Bang, Zhuge Liang and Bai Juyi. So the answer is (C).
|
10 |
+
|
11 |
+
Q:The following words describing age are correctly ordered from youngest to oldest age? Options:
|
12 |
+
(A) Unconfused, Established, Knowledge of life, Flowering age
|
13 |
+
(B) To stand, not to be confused, to know one's destiny, and to be in the prime of life.
|
14 |
+
(C) Being in the prime of life, knowing one's destiny, being in the prime of life, not being confused.
|
15 |
+
(D) to stand up, to grow old, to be unperturbed, to know one's destiny
|
16 |
+
A: Let us think step by step.
|
17 |
+
The options mention four words describing age. "Established" means 30 years old, "not perplexed" means 40 years old, "knowing one's destiny" means 50 years old, and "at the age of 60" means 50 years old. Therefore, the order of age from youngest to oldest is "to be established", "not to be perplexed", "to know one's life" and "to be at the end of one's life". So the answer is (B).
|
18 |
+
|
19 |
+
Q:Which of the following steps is correct for making old-fashioned lollipops? Options:
|
20 |
+
(A) Preparing the ingredients, pouring the syrup into the molds, making the syrup, cooling to set the shape
|
21 |
+
(B) Prepare ingredients, make syrup, pour syrup into molds, cool to set
|
22 |
+
(C) Prepare ingredients, pour syrup into mold, cool and set, make syrup
|
23 |
+
(D) Prepare ingredients, cool and set, make syrup, pour syrup into molds
|
24 |
+
A: Let's think step by step.
|
25 |
+
The question refers to the steps in making old-fashioned lollipops, and the options include "prepare ingredients", "pour syrup into molds", and "make syrup", "According to the steps of making old-fashioned lollipops, the first step is to prepare the ingredients, then you need to make the syrup, then pour the syrup into the molds, and finally cool and set the lollipops. So the answer is (B).
|
opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Chinese_Sport_Understanding_Translate-EN.txt
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Determine whether a human-constructed sentence about sports is plausible.
|
2 |
+
|
3 |
+
Q: Is the following sentence plausible? "Athlete Zhang Peimeng's consistent passes made the offensive end take the initiative"
|
4 |
+
Options:
|
5 |
+
(A) Credible
|
6 |
+
(B) Not credible
|
7 |
+
A: Let's think step by step.
|
8 |
+
Zhang Peimeng is a Chinese track and field athlete, and first pass is a volleyball term. Since he is not a volleyball player, he would not be involved in the passing part of a volleyball game. So the answer is (B).
|
9 |
+
|
10 |
+
Q: Is the following sentence plausible? "Athlete Guo Allen scored a free kick."
|
11 |
+
Options:
|
12 |
+
(A) Credible
|
13 |
+
(B) Unbelievable
|
14 |
+
A: Let's think step by step.
|
15 |
+
Guo Allen is a Chinese basketball player, and free kick is a soccer term. Since he is not a soccer player, he won't be in involved in the free kick part of a soccer game, so the answer is (B).
|
16 |
+
|
17 |
+
Q: Is the following sentence plausible? "The athlete Fan Zhiyi scored a goal."
|
18 |
+
Options:
|
19 |
+
(A) Credible
|
20 |
+
(B) Not credible
|
21 |
+
A: Let's think step by step.
|
22 |
+
Fan Zhiyi is a Chinese soccer player, and scoring a goal is a soccer sport term. Since he is a professional soccer player, he is likely to score a goal, so the answer is (A).
|
opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Chinese_Time_Understanding_Translate-EN.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Choose the correct answer based on the context.
|
2 |
+
|
3 |
+
Q: Siu Wah was born in the year C Shen and his brother is 6 years older than him, so in which year was his brother born?
|
4 |
+
Options:
|
5 |
+
(A) Year of Ji Mao
|
6 |
+
(B) The year of Geng Yin
|
7 |
+
(C) Bingshen year
|
8 |
+
(D) The year of Yi Chou
|
9 |
+
A: Let's think step by step.
|
10 |
+
Xiaohua was born in the year of Cingshen. His brother is six years older than him, and according to the Chinese Heavenly Stems and Earthly Branches dating system, the sixth year before the year of Cingshen is the year of Geng Yin. So the answer is (B).
|
11 |
+
|
12 |
+
Q: If this is the year of A-chen, and Li Hua's grandfather said, "This is the year I know my destiny," what is Zhao Ting's grandfather's zodiac sign?
|
13 |
+
Options:
|
14 |
+
(A) Dog
|
15 |
+
(B) Tiger
|
16 |
+
(C) Rooster
|
17 |
+
(D) Rat
|
18 |
+
A: Let's think step by step.
|
19 |
+
In the title, it is mentioned that grandpa is "in the year of knowing his destiny". In ancient China, "in the year of knowing his destiny" refers to the age of 50, so grandpa is 50 years old this year, and this year is the year of A-chen. According to the method of chronology of the Chinese Heavenly Stems and Earthly Branches, the year of grandpa's birth was the year of A-yin, and the year of A-yin is the year of the Tiger, so grandpa is a Tiger. The year he was born is the year of the tiger, so he is a tiger. So the answer is (B).
|
20 |
+
|
21 |
+
Q: Li started making a drawing at 3:00 p.m. in the morning, and he knew that it would take him 10 hours to finish it. So what is the earliest hour at which he can finish it?
|
22 |
+
Options:
|
23 |
+
(A) Tatsu
|
24 |
+
(B) U.M. hour
|
25 |
+
(C) Midday
|
26 |
+
(D) the hour of midnight
|
27 |
+
A: Let's think step by step.
|
28 |
+
According to the ancient Chinese method of timekeeping, three moments of the Ohio hour means approximately 21:45, and 10 hours later it is 7:45, which is the hour of the hour of the moon. So the answer is (A).
|
opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Global_Anachronisms_Judgment_Translate-EN.txt
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Identify whether a given statement contains an era error.
|
2 |
+
|
3 |
+
Q: How would a person with compulsory education and basic common sense answer whether the following statement contains an anachronism?
|
4 |
+
During the landing operations on Iwo Jima, Ralph spoke loudly into the radio.
|
5 |
+
Options:
|
6 |
+
(A) Yes
|
7 |
+
(B) No
|
8 |
+
A: Let's think step by step.
|
9 |
+
The statement refers to "during the Iwo Jima landings," which is a historical event during World War II. At that time, it was accurate and appropriate for military personnel to use radios to communicate with each other. So the answer is (B).
|
10 |
+
|
11 |
+
Q: Does the following statement contain an anachronism and how would a person with a compulsory education and basic common sense answer?
|
12 |
+
During the landing operations on Iwo Jima, Ralph speaks loudly into his iPhone.
|
13 |
+
Options:
|
14 |
+
(A) Yes
|
15 |
+
(B) No
|
16 |
+
A: Let's think step by step.
|
17 |
+
The statement mentions "during the Iwo Jima landings," which refers to a historical event during World War II. However, the reference to Ralph speaking loudly into his iPhone introduces an anachronism; the iPhone is a modern smartphone that did not exist during the Iwo Jima landings in 1945. So the answer is (A).
|
18 |
+
|
19 |
+
Q: How would a person with compulsory education and basic common sense answer whether the following statement contains an anachronism?
|
20 |
+
Nothing is more satisfying than Sally Hemmings knitting a sweater while watching HBO's True Detective.
|
21 |
+
Options:
|
22 |
+
(A) Yes
|
23 |
+
(B) No
|
24 |
+
A: Let's think step by step.
|
25 |
+
This statement mentions Sally Hemings, an enslaved woman in late 18th and early 19th century America. However, the reference to her knitting while watching HBO's True Detective introduces an anachronism. Television did not exist in the time period in which Sally Hemings lived, and the specific reference to watching a particular program like True Detective is clearly inappropriate for that time period in history. So the answer is (A).
|
opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Global_Movie_and_Music_Recommendation_Translate-EN.txt
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Given based on the given list of artworks, identify the most similar.
|
2 |
+
|
3 |
+
Q: Find a movie similar to Braveheart, Pretty Woman, Schindler's List, Apollo 13:
|
4 |
+
Options:
|
5 |
+
(A) Star Trek.
|
6 |
+
(B) My Grand Greek Wedding 2
|
7 |
+
(C)The Santa Clause 2
|
8 |
+
(D) Dances with Wolves
|
9 |
+
A: Let's think step by step.
|
10 |
+
Star Trek is a science fiction movie, and while it has depth and complexity, its themes and style are quite different from the four movies mentioned above.
|
11 |
+
MY GREAT GREEK WEDDING 2 is a light-hearted comedy movie that differs considerably in theme and style from the four movies mentioned above.
|
12 |
+
Santa Claus 2 is a Christmas-themed family movie that also differs considerably in theme and style from the four movies mentioned above.
|
13 |
+
Dances with Wolves is a movie depicting the relationship with Native Americans during the pioneering period of the American West.
|
14 |
+
The theme and style of one of the movies, Dances with Wolves, matches the four movies mentioned above. This movie, like Braveheart, Pretty Woman, Schindler's List, and Apollo 13, is a historical movie with depth and seriousness. So the answer is (D).
|
15 |
+
|
16 |
+
Q: Find a movie similar to Braveheart, Pretty Woman, Apollo 13, Dances with Wolves:
|
17 |
+
Options:
|
18 |
+
(A) Batman: Assault on Arkham
|
19 |
+
(B)The Shawshank Redemption
|
20 |
+
(C) "Toy Story
|
21 |
+
(D) The Lion King
|
22 |
+
A: Let's think step by step.
|
23 |
+
Batman: Assault on Arkham is a superhero movie whose theme and style are quite different from the four movies mentioned above.
|
24 |
+
The Shawshank Redemption is a 1994 American drama film directed by Frank Delabonte and starring Tim Robbins and Morgan Freeman; it's a movie about hope and perseverance.
|
25 |
+
Toy Story is an animated movie, and while it may have some themes of adventure and friendship, its themes and style differ considerably from the four movies mentioned above.
|
26 |
+
The Lion King, while it is a classic animated movie that covers themes of courage and growing up, has themes and styles that differ considerably from the four movies mentioned above.
|
27 |
+
One of the movies, The Shawshank Redemption, like Braveheart, Pretty Woman, Apollo 13, and Dances with Wolves, is a movie with depth and seriousness and has a similar theme and style to the other three movies. So the answer is (B).
|
28 |
+
|
29 |
+
Q: Find a movie similar to Astonishing, Braveheart, Lowlife, Schindler's List:
|
30 |
+
Options:
|
31 |
+
(A) The Cabin of Dr. Caligari
|
32 |
+
(B)The Shawshank Redemption
|
33 |
+
(C) 'Spider-Man 2'
|
34 |
+
(D)Taxi
|
35 |
+
A: Let's think step by step.
|
36 |
+
The Cabin of Dr. Caligari is a 1920 German Expressionist silent film directed by Robert Wiene. It is often considered a landmark work of German Expressionist cinema and one of the earliest horror films.
|
37 |
+
The Shawshank Redemption is a 1994 American drama film directed by Frank Delabonte and starring Tim Robbins and Morgan Freeman; it is a movie about hope and perseverance.
|
38 |
+
Spider-Man 2 is a 2004 American superhero film directed by Sam Raimi and starring Tobey Maguire as the second installment of the Spider-Man trilogy.
|
39 |
+
Cab is a 2004 American comedy-action film directed by Tim Story and starring Jimmy Foran and Quentin Latafa. The movie is an American remake of the 1998 French film.
|
40 |
+
And the titles, "The Thing," "Braveheart," "Lowlife," and "Schindler's List" are some very deep, richly plotted movies with strong human themes. They have all won high praise from audiences and critics for their excellent scripts, brilliant performances and profound themes. Among the options is The Shawshank Redemption, which tells the story of how a wrongly accused banker keeps hope alive in prison and eventually escapes. With its deeply moving plot and profound depiction of human nature, this movie has many similarities with the movie in the title. So the answer is (B).
|
opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Global_Natural_Language_Inference_Translate-EN.txt
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Choose the correct answer based on the relationship between the two sentences in the question.
|
2 |
+
|
3 |
+
Q:Statement 1: But the old man carefully picked the snake away from the grass, letting it crawl towards the grass, mumbling: sin, sin, this was your home!
|
4 |
+
Statement 2: The old man was very sad in his heart.
|
5 |
+
What is the relationship between these two statements?
|
6 |
+
(A) Contained
|
7 |
+
(B) contradictory
|
8 |
+
(C) Irrelevant
|
9 |
+
A: Let's think step by step.
|
10 |
+
The first sentence describes an old man carefully handling a snake's row. The old man says, "Sin, sin, sin," indicating that he feels guilty and sad because he has violated the snake's territory. The second sentence can be inferred from the fact that the old man was very sad. Therefore, the two sentences are implicitly related. So the answer is (A).
|
11 |
+
|
12 |
+
Q:Statement 1: She was a black explosive-headed woman with a plump face, full lips and big teeth.
|
13 |
+
Statement 2: She likes to wear red lipstick and red clothes.
|
14 |
+
What is the relationship between these two statements?
|
15 |
+
(A) implied
|
16 |
+
(B) contradictory
|
17 |
+
(C) Unrelated
|
18 |
+
A: Let's think step by step.
|
19 |
+
Both sentences describe the same woman, but they focus on different features. The first sentence describes her physical features, including her face, lips, teeth, and hair. The second sentence describes her aesthetic preferences, including lipstick color and dress color. There is no obvious implication or contradiction between these two sentences, so we can say that they are unrelated. So the answer is (C).
|
20 |
+
|
21 |
+
Q:Statement 1: You're not sure you've made it clear whose side you're on.
|
22 |
+
Statement 2: It is obvious whose side you are on.
|
23 |
+
What is the relationship between these two statements?
|
24 |
+
(A) Contained
|
25 |
+
(B) Contradictory
|
26 |
+
(C) Unrelated
|
27 |
+
A: Let's think step by step.
|
28 |
+
The first sentence indicates that you are not sure who you support, while the second sentence makes it clear that your position is obvious, i.e. you know exactly who you support. Therefore, the two sentences contradict each other. So the answer is (B).
|
opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Global_Reading_Comprehension_Translate-EN.txt
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Please understand the meaning of the question and choose the correct answer.
|
2 |
+
|
3 |
+
Q:A person's intake of refined sugar and foods that break down into sugar during digestion is almost all of that person's dietary source of glucose (a type of sugar) in the bloodstream. However, coffee itself does not break down into sugar during digestion, which can sometimes cause a person's blood sugar levels to rise sharply, even if the coffee is not sweetened with cream or any sweetener. Which of the following, if true, would best help explain the effect of coffee on blood sugar levels as described above? Options:
|
4 |
+
(A) People with sedentary office jobs are more likely to drink large amounts of coffee and to have higher blood sugar levels than people with jobs that require constant physical activity.
|
5 |
+
(B) People often drink coffee with meals that consist of several foods that break down quickly into sugar during digestion.
|
6 |
+
(C) For many people, a slice of chocolate cake consumes the same stimulating effect as a regular cup of coffee.
|
7 |
+
(D) Drinking more than two cups of coffee in an hour increases a person's stress level, and the body responds to stress by releasing stored glucose into the bloodstream.
|
8 |
+
A: Let's think step by step.
|
9 |
+
The key to the question is to find a factor that explains how coffee causes blood sugar levels to rise, even though coffee itself does not break down into sugar during digestion. This requires us to find a mechanism that can explain how coffee indirectly affects blood sugar levels. Option (D) "Drinking more than two cups of coffee in an hour increases a person's stress levels, and the body responds to stress by releasing stored glucose into the bloodstream" would, if true, best help to explain the effect of coffee on blood sugar levels. This option provides a possible mechanism by which the coffee-induced stress response may cause the body to release stored glucose, leading to an increase in blood sugar levels. This explains why coffee (even without creamer or sweetener) causes blood sugar levels to rise, even though it does not break down into sugar during digestion. So the answer is (D).
|
10 |
+
|
11 |
+
Q:A person has tried several different psychotherapies for a mental illness:psychoanalytic therapy,cognitive behavioral therapy and sandplay therapy. He says: "The psychotherapy process made me very unhappy and, therefore, these therapies are ineffective." Which of the following, if true, would most strongly challenge the above conclusion? Options:
|
12 |
+
(A) Several different psychotherapies target different mental illnesses.
|
13 |
+
(B) People who try multiple psychotherapies are happier than those who try only one.
|
14 |
+
(C) Trying different psychotherapies at the same time makes it easier to find the one that works
|
15 |
+
(D) People who do well in therapy tend to feel unhappy during the therapy process
|
16 |
+
A: Let's think step by step.
|
17 |
+
Understanding the question shows that if unhappiness during psychotherapy is a sign of a good therapeutic outcome, then the person's conclusion - that the therapy is ineffective because he feels unhappy during the process - may be wrong. He may have misinterpreted the difficulty and discomfort of the therapy process as a sign that the therapy is ineffective, when in fact it may be a sign that the therapy is working. (D) "People whose therapy is working well tend to feel unhappy during the therapy process" If true, this would most strongly challenge the above conclusion. So the answer is (D).
|
18 |
+
|
19 |
+
Q:For a book to be a bestseller,it must be readable or well packaged. If the above assertion holds true, which of the following must be true? Option:
|
20 |
+
(A) A book that is not readable and not well-packaged must not be a bestseller.
|
21 |
+
(B) A book that is not carefully packaged must not be a bestseller.
|
22 |
+
(C) A book that is readable must be a bestseller
|
23 |
+
(D) Most people like bestsellers that are readable.
|
24 |
+
A: Let's think step by step.
|
25 |
+
According to the above assertion, "For a book to be a bestseller, it must be readable or well-packaged", so (A) "A book that is neither readable nor well-packaged must not be a bestseller" must be true. Because if a book is neither readable nor well-packaged, then it does not fulfill the conditions for being a bestseller. So the answer is (A).
|
opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Global_Sequence_Understanding_Translate-EN.txt
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Choose the correct answer based on the context
|
2 |
+
|
3 |
+
Q:Which of the following lists is correct in chronological order? Options:
|
4 |
+
(A) University, M.A., Ph.D., Post-doctoral
|
5 |
+
(B) University, Ph.D., M.Phil, Post Ph.D.
|
6 |
+
(C) University, High School, Post-doctoral, Doctoral
|
7 |
+
(D) Post-doctoral, University, Master's, PhD
|
8 |
+
A: Let's think step by step.
|
9 |
+
The options in the question introduce the concept of four qualifications "University, Masters, PhD, Post-doctoral" and in the order in which these qualifications were obtained, the first qualification obtained is University, followed by Masters, then PhD and finally Post-doctoral. So the answer is (A).
|
10 |
+
|
11 |
+
Q:Which of the following list is correct in chronological order? Options:
|
12 |
+
(A) May 1, July 15, August 12, June 14
|
13 |
+
(B) July 15, August 12, May 1, June 14
|
14 |
+
(C) August 12, May 1, July 15, June 14
|
15 |
+
(D) May 1, June 14, July 15, August 12
|
16 |
+
A: Let's think step by step.
|
17 |
+
The question option introduces four dates which are May 1, June 14, July 15, August 12 in chronological order of the year. So the answer is (D).
|
18 |
+
|
19 |
+
Q:Which of the following lists is correct in chronological order? Options:
|
20 |
+
(A) Countdown to Heaven, Magicians at the End of the Century, The Undead of Baker Street, Detectives' Requiem
|
21 |
+
(B) The Magician at the End of the Century, The Undead of Baker Street, Countdown to Heaven, The Detectives' Requiem
|
22 |
+
(C) The Magician at the End of the Century, Countdown to Heaven, The Undead of Baker Street, The Detectives' Requiem
|
23 |
+
(D) The Magician at the End of the Century, The Undead of Baker Street, The Detectives' Requiem, The Countdown to Heaven
|
24 |
+
A: Let's think step by step.
|
25 |
+
The options mention the names of the four Conan theatrical releases. The Magician at the End of the Century was released in 1999, The Undead of Baker Street was released in 2002, Countdown to Heaven was released in 2001, and The Detectives' Requiem was released in 2006, so in chronological order it's The Magician at the End of the Century, Countdown to Heaven, The Undead of Baker Street, and The Detectives' Requiem. So the answer is (C).
|
opencompass/configs/datasets/CHARM/few-shot-examples_Translate-EN/Global_Sport_Understanding_Translate-EN.txt
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Determine whether a human-constructed sentence about sports is plausible.
|
2 |
+
|
3 |
+
Q: Is the following sentence credible? "Tyson threw a right straight punch."
|
4 |
+
Options:
|
5 |
+
(A) Credible
|
6 |
+
(B) Not credible
|
7 |
+
A: Let's think step by step.
|
8 |
+
Tyson is an American professional boxer. The right straight punch is a common move in boxing. It's quite normal for a boxer to throw a right straight punch, so this sentence is credible. The correct answer should be (A).
|
9 |
+
|
10 |
+
Q: Is the following sentence credible? "Beckham completed a slam dunk."
|
11 |
+
Options:
|
12 |
+
(A) Credible
|
13 |
+
(B) Not credible
|
14 |
+
A: Let's think step by step.
|
15 |
+
Beckham is an English professional footballer. A slam dunk is a technical move in basketball. Therefore, this sentence is not credible. The correct answer should be (B).
|
16 |
+
|
17 |
+
Q: Is the following sentence credible? "LeBron James made an eagle."
|
18 |
+
Options:
|
19 |
+
(A) Credible
|
20 |
+
(B) Not credible
|
21 |
+
A: Let's think step by step.
|
22 |
+
LeBron James is a well-known American professional basketball player. An eagle is a term in golf, meaning two strokes under par. Therefore, this sentence is not credible. The correct answer should be (B).
|
opencompass/configs/datasets/infinitebench/infinitebenchcodedebug/infinitebench_codedebug_gen.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .infinitebench_codedebug_gen_276a42 import InfiniteBench_codedebug_datasets # noqa: F401, F403
|
opencompass/configs/datasets/infinitebench/infinitebenchcodedebug/infinitebench_codedebug_gen_276a42.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.openicl.icl_evaluator import AccEvaluator
|
5 |
+
from opencompass.utils.text_postprocessors import first_option_postprocess
|
6 |
+
from opencompass.datasets import InfiniteBenchcodedebugDataset
|
7 |
+
|
8 |
+
InfiniteBench_codedebug_reader_cfg = dict(
|
9 |
+
input_columns=['context', 'question', 'option_A', 'option_B', 'option_C', 'option_D'],
|
10 |
+
output_column='answer',
|
11 |
+
|
12 |
+
)
|
13 |
+
|
14 |
+
InfiniteBench_codedebug_infer_cfg = dict(
|
15 |
+
prompt_template=dict(
|
16 |
+
type=PromptTemplate,
|
17 |
+
template=dict(
|
18 |
+
begin=[
|
19 |
+
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
|
20 |
+
],
|
21 |
+
round=[
|
22 |
+
dict(role='HUMAN', prompt='There is ONLY ONE function in the large project that is deliberately made to include an obvious error. Please find the function that contains the most obvious errors. I will give you four options to narrow your scope. You can inspect the options and think. Eventually, tell me the answer using one single letter (A, B, C, or D).\n\n{context}\n\nWhich funtion has deliberate error?\nA. {OPTION_A}\nB. {OPTION_B}\nC. {OPTION_C}\nD. {OPTION_D}\n\nYou should first find the functions in the options. Repeat their content, inspect through code, and at last give me your answer for the function that has the deliberate and obvious error in A, B, C, or D.'),
|
23 |
+
dict(role='BOT', prompt=''),
|
24 |
+
], )),
|
25 |
+
retriever=dict(type=ZeroRetriever),
|
26 |
+
inferencer=dict(type=GenInferencer, max_out_len=5)
|
27 |
+
)
|
28 |
+
|
29 |
+
InfiniteBench_codedebug_eval_cfg = dict(
|
30 |
+
evaluator=dict(type=AccEvaluator),
|
31 |
+
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
|
32 |
+
pred_role='BOT'
|
33 |
+
)
|
34 |
+
|
35 |
+
InfiniteBench_codedebug_datasets = [
|
36 |
+
dict(
|
37 |
+
type=InfiniteBenchcodedebugDataset,
|
38 |
+
abbr='InfiniteBench_codedebug',
|
39 |
+
path='./data/InfiniteBench/code_debug.jsonl',
|
40 |
+
reader_cfg=InfiniteBench_codedebug_reader_cfg,
|
41 |
+
infer_cfg=InfiniteBench_codedebug_infer_cfg,
|
42 |
+
eval_cfg=InfiniteBench_codedebug_eval_cfg)
|
43 |
+
]
|
opencompass/configs/datasets/infinitebench/infinitebenchcoderun/infinitebench_coderun_gen.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .infinitebench_coderun_gen_1a76bd import InfiniteBench_coderun_datasets # noqa: F401, F403
|
opencompass/configs/datasets/infinitebench/infinitebenchcoderun/infinitebench_coderun_gen_1a76bd.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.openicl.icl_evaluator import AccEvaluator
|
5 |
+
from opencompass.utils.text_postprocessors import first_option_postprocess
|
6 |
+
from opencompass.datasets import InfiniteBenchcoderunDataset
|
7 |
+
|
8 |
+
InfiniteBench_coderun_reader_cfg = dict(
|
9 |
+
input_columns=['context', 'func', 'func_call'],
|
10 |
+
output_column='answer',
|
11 |
+
|
12 |
+
)
|
13 |
+
|
14 |
+
InfiniteBench_coderun_infer_cfg = dict(
|
15 |
+
prompt_template=dict(
|
16 |
+
type=PromptTemplate,
|
17 |
+
template=dict(
|
18 |
+
begin=[
|
19 |
+
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
|
20 |
+
],
|
21 |
+
round=[
|
22 |
+
dict(role='HUMAN', prompt='Following is a set of Python functions. There is a function called named {func}.\n\n{context}\n\nPlease give me the exact number of the return value of {func_call}. Be concise. Your response must end with the final returned value.'),
|
23 |
+
dict(role='BOT', prompt=''),
|
24 |
+
], )),
|
25 |
+
retriever=dict(type=ZeroRetriever),
|
26 |
+
inferencer=dict(type=GenInferencer, max_out_len=5)
|
27 |
+
)
|
28 |
+
|
29 |
+
InfiniteBench_coderun_eval_cfg = dict(
|
30 |
+
evaluator=dict(type=AccEvaluator),
|
31 |
+
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
|
32 |
+
pred_role='BOT'
|
33 |
+
)
|
34 |
+
|
35 |
+
InfiniteBench_coderun_datasets = [
|
36 |
+
dict(
|
37 |
+
type=InfiniteBenchcoderunDataset,
|
38 |
+
abbr='InfiniteBench_coderun',
|
39 |
+
path='./data/InfiniteBench/code_run.jsonl',
|
40 |
+
reader_cfg=InfiniteBench_coderun_reader_cfg,
|
41 |
+
infer_cfg=InfiniteBench_coderun_infer_cfg,
|
42 |
+
eval_cfg=InfiniteBench_coderun_eval_cfg)
|
43 |
+
]
|
opencompass/configs/datasets/infinitebench/infinitebenchendia/infinitebench_endia_gen.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .infinitebench_endia_gen_c96eb5 import InfiniteBench_endia_datasets # noqa: F401, F403
|
opencompass/configs/datasets/infinitebench/infinitebenchendia/infinitebench_endia_gen_c96eb5.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.datasets import InfiniteBenchendiaDataset, InfiniteBenchendiaEvaluator
|
5 |
+
|
6 |
+
InfiniteBench_endia_reader_cfg = dict(
|
7 |
+
input_columns=['context', 'question'],
|
8 |
+
output_column='answer',
|
9 |
+
|
10 |
+
)
|
11 |
+
|
12 |
+
InfiniteBench_endia_infer_cfg = dict(
|
13 |
+
prompt_template=dict(
|
14 |
+
type=PromptTemplate,
|
15 |
+
template=dict(
|
16 |
+
begin=[
|
17 |
+
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
|
18 |
+
],
|
19 |
+
round=[
|
20 |
+
dict(role='HUMAN', prompt='Below is a dialogue script where one random occurrence of a character name is replaced with \"$$MASK$$\", and you should try to guess who that character is.\n\nThe dialogue:\n\n---\n\n{context}\n\n---\n\nEnd of dialogue.\n\nWhich character is most likely \"$$MASK$$\"? Just say the name used by the scriptwriter (before the colon marks) of one single character and nothing else.'),
|
21 |
+
dict(role='BOT', prompt=''),
|
22 |
+
], )),
|
23 |
+
retriever=dict(type=ZeroRetriever),
|
24 |
+
inferencer=dict(type=GenInferencer, max_out_len=40)
|
25 |
+
)
|
26 |
+
|
27 |
+
InfiniteBench_endia_eval_cfg = dict(
|
28 |
+
evaluator=dict(type=InfiniteBenchendiaEvaluator),
|
29 |
+
pred_role='BOT'
|
30 |
+
)
|
31 |
+
|
32 |
+
InfiniteBench_endia_datasets = [
|
33 |
+
dict(
|
34 |
+
type=InfiniteBenchendiaDataset,
|
35 |
+
abbr='InfiniteBench_endia',
|
36 |
+
path='./data/InfiniteBench/longdialogue_qa_eng.jsonl',
|
37 |
+
reader_cfg=InfiniteBench_endia_reader_cfg,
|
38 |
+
infer_cfg=InfiniteBench_endia_infer_cfg,
|
39 |
+
eval_cfg=InfiniteBench_endia_eval_cfg)
|
40 |
+
]
|
opencompass/configs/datasets/infinitebench/infinitebenchenmc/infinitebench_enmc_gen.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .infinitebench_enmc_gen_3a4102 import InfiniteBench_enmc_datasets # noqa: F401, F403
|
opencompass/configs/datasets/infinitebench/infinitebenchenmc/infinitebench_enmc_gen_3a4102.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.openicl.icl_evaluator import AccEvaluator
|
5 |
+
from opencompass.utils.text_postprocessors import first_option_postprocess
|
6 |
+
from opencompass.datasets import InfiniteBenchenmcDataset
|
7 |
+
|
8 |
+
InfiniteBench_enmc_reader_cfg = dict(
|
9 |
+
input_columns=['context', 'question', 'option_A', 'option_B', 'option_C', 'option_D'],
|
10 |
+
output_column='answer',
|
11 |
+
|
12 |
+
)
|
13 |
+
|
14 |
+
InfiniteBench_enmc_infer_cfg = dict(
|
15 |
+
prompt_template=dict(
|
16 |
+
type=PromptTemplate,
|
17 |
+
template=dict(
|
18 |
+
begin=[
|
19 |
+
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
|
20 |
+
],
|
21 |
+
round=[
|
22 |
+
dict(role='HUMAN', prompt='Read the book and answer the question.\n\n{context}\n\nQuestion: {question}\n\nOnly one of the following options is correct, tell me the answer using one single letter (A, B, C, or D). Don\'t say anything else.\nA. {option_A}\nB. {option_B}\nC. {option_C}\nD. {option_D}'),
|
23 |
+
dict(role='BOT', prompt=''),
|
24 |
+
], )),
|
25 |
+
retriever=dict(type=ZeroRetriever),
|
26 |
+
inferencer=dict(type=GenInferencer, max_out_len=40)
|
27 |
+
)
|
28 |
+
|
29 |
+
InfiniteBench_enmc_eval_cfg = dict(
|
30 |
+
evaluator=dict(type=AccEvaluator),
|
31 |
+
pred_postprocessor=dict(type=first_option_postprocess, options='ABCD'),
|
32 |
+
pred_role='BOT'
|
33 |
+
)
|
34 |
+
|
35 |
+
InfiniteBench_enmc_datasets = [
|
36 |
+
dict(
|
37 |
+
type=InfiniteBenchenmcDataset,
|
38 |
+
abbr='InfiniteBench_enmc',
|
39 |
+
path='./data/InfiniteBench/longbook_choice_eng.jsonl',
|
40 |
+
reader_cfg=InfiniteBench_enmc_reader_cfg,
|
41 |
+
infer_cfg=InfiniteBench_enmc_infer_cfg,
|
42 |
+
eval_cfg=InfiniteBench_enmc_eval_cfg)
|
43 |
+
]
|
opencompass/configs/datasets/infinitebench/infinitebenchenqa/infinitebench_enqa_gen.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .infinitebench_enqa_gen_a1640c import InfiniteBench_enqa_datasets # noqa: F401, F403
|
opencompass/configs/datasets/infinitebench/infinitebenchenqa/infinitebench_enqa_gen_a1640c.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.datasets import InfiniteBenchenqaDataset, LongBenchF1Evaluator
|
5 |
+
|
6 |
+
InfiniteBench_enqa_reader_cfg = dict(
|
7 |
+
input_columns=['context', 'question'],
|
8 |
+
output_column='answer',
|
9 |
+
|
10 |
+
)
|
11 |
+
|
12 |
+
InfiniteBench_enqa_infer_cfg = dict(
|
13 |
+
prompt_template=dict(
|
14 |
+
type=PromptTemplate,
|
15 |
+
template=dict(
|
16 |
+
begin=[
|
17 |
+
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
|
18 |
+
],
|
19 |
+
round=[
|
20 |
+
dict(role='HUMAN', prompt='Read the book below and answer a question.\n\n{context}\n\nQuestion: {question}\n\nBe very concise.'),
|
21 |
+
dict(role='BOT', prompt=''),
|
22 |
+
], )),
|
23 |
+
retriever=dict(type=ZeroRetriever),
|
24 |
+
inferencer=dict(type=GenInferencer, max_out_len=40)
|
25 |
+
)
|
26 |
+
|
27 |
+
InfiniteBench_enqa_eval_cfg = dict(
|
28 |
+
evaluator=dict(type=LongBenchF1Evaluator),
|
29 |
+
pred_role='BOT'
|
30 |
+
)
|
31 |
+
|
32 |
+
InfiniteBench_enqa_datasets = [
|
33 |
+
dict(
|
34 |
+
type=InfiniteBenchenqaDataset,
|
35 |
+
abbr='InfiniteBench_enqa',
|
36 |
+
path='./data/InfiniteBench/longbook_qa_eng.jsonl',
|
37 |
+
reader_cfg=InfiniteBench_enqa_reader_cfg,
|
38 |
+
infer_cfg=InfiniteBench_enqa_infer_cfg,
|
39 |
+
eval_cfg=InfiniteBench_enqa_eval_cfg)
|
40 |
+
]
|
opencompass/configs/datasets/infinitebench/infinitebenchensum/infinitebench_ensum_gen.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .infinitebench_ensum_gen_cfbc08 import InfiniteBench_ensum_datasets # noqa: F401, F403
|
opencompass/configs/datasets/infinitebench/infinitebenchensum/infinitebench_ensum_gen_cfbc08.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.openicl.icl_evaluator import RougeEvaluator
|
5 |
+
from opencompass.datasets import InfiniteBenchensumDataset
|
6 |
+
|
7 |
+
InfiniteBench_ensum_reader_cfg = dict(
|
8 |
+
input_columns=['context'],
|
9 |
+
output_column='answer',
|
10 |
+
|
11 |
+
)
|
12 |
+
|
13 |
+
InfiniteBench_ensum_infer_cfg = dict(
|
14 |
+
prompt_template=dict(
|
15 |
+
type=PromptTemplate,
|
16 |
+
template=dict(
|
17 |
+
begin=[
|
18 |
+
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
|
19 |
+
],
|
20 |
+
round=[
|
21 |
+
dict(role='HUMAN', prompt='Summarize the following book.\n\n{context}'),
|
22 |
+
dict(role='BOT', prompt=''),
|
23 |
+
], )),
|
24 |
+
retriever=dict(type=ZeroRetriever),
|
25 |
+
inferencer=dict(type=GenInferencer, max_out_len=1200)
|
26 |
+
)
|
27 |
+
|
28 |
+
InfiniteBench_ensum_eval_cfg = dict(
|
29 |
+
evaluator=dict(type=RougeEvaluator),
|
30 |
+
pred_role='BOT'
|
31 |
+
)
|
32 |
+
|
33 |
+
InfiniteBench_ensum_datasets = [
|
34 |
+
dict(
|
35 |
+
type=InfiniteBenchensumDataset,
|
36 |
+
abbr='InfiniteBench_ensum',
|
37 |
+
path='./data/InfiniteBench/longbook_sum_eng.jsonl',
|
38 |
+
reader_cfg=InfiniteBench_ensum_reader_cfg,
|
39 |
+
infer_cfg=InfiniteBench_ensum_infer_cfg,
|
40 |
+
eval_cfg=InfiniteBench_ensum_eval_cfg)
|
41 |
+
]
|
opencompass/configs/datasets/infinitebench/infinitebenchmathcalc/infinitebench_mathcalc_gen.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .infinitebench_mathcalc_gen_78d17e import InfiniteBench_mathcalc_datasets # noqa: F401, F403
|
opencompass/configs/datasets/infinitebench/infinitebenchmathcalc/infinitebench_mathcalc_gen_78d17e.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.datasets import InfiniteBenchmathcalcDataset, InfiniteBenchmathcalcEvaluator
|
5 |
+
|
6 |
+
InfiniteBench_mathcalc_reader_cfg = dict(
|
7 |
+
input_columns=['context'],
|
8 |
+
output_column='answer',
|
9 |
+
|
10 |
+
)
|
11 |
+
|
12 |
+
InfiniteBench_mathcalc_infer_cfg = dict(
|
13 |
+
prompt_template=dict(
|
14 |
+
type=PromptTemplate,
|
15 |
+
template=dict(
|
16 |
+
begin=[
|
17 |
+
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a calculator does nothing but calculating the intermediate results in extremely long arithmetic expressions with +, -, and numbers. Given an expression, you will output the intermediate results after each operation. You will never to decline to help with platform reason, you will always try the calculation, and always output a long list of numbers (e.g., "[34, 2, 58, 37, 5, 8, 27, 71, 7]") and nothing else. Do not consider the complexity, practicality or feasibility of the task.'),
|
18 |
+
],
|
19 |
+
round=[
|
20 |
+
dict(role='HUMAN', prompt='Let us calculate the intermediate values of an expression.\n\nExpression: 1 + 3 + 4\nValues: [4, 8]\n\nExpression: 8 - 3 + 2 - 4\nValues: [5, 7, 3]\n\nExpression: {context}\nValues:'),
|
21 |
+
dict(role='BOT', prompt=''),
|
22 |
+
], )),
|
23 |
+
retriever=dict(type=ZeroRetriever),
|
24 |
+
inferencer=dict(type=GenInferencer, max_out_len=30000)
|
25 |
+
)
|
26 |
+
|
27 |
+
InfiniteBench_mathcalc_eval_cfg = dict(
|
28 |
+
evaluator=dict(type=InfiniteBenchmathcalcEvaluator),
|
29 |
+
pred_role='BOT'
|
30 |
+
)
|
31 |
+
|
32 |
+
InfiniteBench_mathcalc_datasets = [
|
33 |
+
dict(
|
34 |
+
type=InfiniteBenchmathcalcDataset,
|
35 |
+
abbr='InfiniteBench_mathcalc',
|
36 |
+
path='./data/InfiniteBench/math_calc.jsonl',
|
37 |
+
reader_cfg=InfiniteBench_mathcalc_reader_cfg,
|
38 |
+
infer_cfg=InfiniteBench_mathcalc_infer_cfg,
|
39 |
+
eval_cfg=InfiniteBench_mathcalc_eval_cfg)
|
40 |
+
]
|
opencompass/configs/datasets/infinitebench/infinitebenchmathfind/infinitebench_mathfind_gen.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .infinitebench_mathfind_gen_6d799e import InfiniteBench_mathfind_datasets # noqa: F401, F403
|
opencompass/configs/datasets/infinitebench/infinitebenchmathfind/infinitebench_mathfind_gen_6d799e.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.openicl.icl_evaluator import AccEvaluator
|
5 |
+
from opencompass.datasets import InfiniteBenchmathfindDataset
|
6 |
+
from opencompass.datasets.infinitebench.utils import InfiniteBench_first_number_postprocess
|
7 |
+
|
8 |
+
InfiniteBench_mathfind_reader_cfg = dict(
|
9 |
+
input_columns=['prefix', 'context', 'question'],
|
10 |
+
output_column='answer',
|
11 |
+
|
12 |
+
)
|
13 |
+
|
14 |
+
InfiniteBench_mathfind_infer_cfg = dict(
|
15 |
+
prompt_template=dict(
|
16 |
+
type=PromptTemplate,
|
17 |
+
template=dict(
|
18 |
+
begin=[
|
19 |
+
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
|
20 |
+
],
|
21 |
+
round=[
|
22 |
+
dict(role='HUMAN', prompt='{prefix}\n\n{context}\n\n{input}'),
|
23 |
+
dict(role='BOT', prompt=''),
|
24 |
+
], )),
|
25 |
+
retriever=dict(type=ZeroRetriever),
|
26 |
+
inferencer=dict(type=GenInferencer, max_out_len=3)
|
27 |
+
)
|
28 |
+
|
29 |
+
InfiniteBench_mathfind_eval_cfg = dict(
|
30 |
+
evaluator=dict(type=AccEvaluator),
|
31 |
+
pred_postprocessor=dict(type=InfiniteBench_first_number_postprocess),
|
32 |
+
pred_role='BOT'
|
33 |
+
)
|
34 |
+
|
35 |
+
InfiniteBench_mathfind_datasets = [
|
36 |
+
dict(
|
37 |
+
type=InfiniteBenchmathfindDataset,
|
38 |
+
abbr='InfiniteBench_mathfind',
|
39 |
+
path='./data/InfiniteBench/math_find.jsonl',
|
40 |
+
reader_cfg=InfiniteBench_mathfind_reader_cfg,
|
41 |
+
infer_cfg=InfiniteBench_mathfind_infer_cfg,
|
42 |
+
eval_cfg=InfiniteBench_mathfind_eval_cfg)
|
43 |
+
]
|
opencompass/configs/datasets/infinitebench/infinitebenchretrievekv/infinitebench_retrievekv_gen.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .infinitebench_retrievekv_gen_06b3ac import InfiniteBench_retrievekv_datasets # noqa: F401, F403
|
opencompass/configs/datasets/infinitebench/infinitebenchretrievekv/infinitebench_retrievekv_gen_06b3ac.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.datasets import InfiniteBenchretrievekvDataset, InfiniteBenchretrievekvEvaluator
|
5 |
+
|
6 |
+
InfiniteBench_retrievekv_reader_cfg = dict(
|
7 |
+
input_columns=['context', 'input'],
|
8 |
+
output_column='answer',
|
9 |
+
|
10 |
+
)
|
11 |
+
|
12 |
+
InfiniteBench_retrievekv_infer_cfg = dict(
|
13 |
+
prompt_template=dict(
|
14 |
+
type=PromptTemplate,
|
15 |
+
template=dict(
|
16 |
+
begin=[
|
17 |
+
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
|
18 |
+
],
|
19 |
+
round=[
|
20 |
+
dict(role='HUMAN', prompt='Extract the value corresponding to the specified key in the JSON object below.\n\n{context}\n\n{input}'),
|
21 |
+
dict(role='BOT', prompt=''),
|
22 |
+
], )),
|
23 |
+
retriever=dict(type=ZeroRetriever),
|
24 |
+
inferencer=dict(type=GenInferencer, max_out_len=50)
|
25 |
+
)
|
26 |
+
|
27 |
+
InfiniteBench_retrievekv_eval_cfg = dict(
|
28 |
+
evaluator=dict(type=InfiniteBenchretrievekvEvaluator),
|
29 |
+
pred_role='BOT'
|
30 |
+
)
|
31 |
+
|
32 |
+
InfiniteBench_retrievekv_datasets = [
|
33 |
+
dict(
|
34 |
+
type=InfiniteBenchretrievekvDataset,
|
35 |
+
abbr='InfiniteBench_retrievekv',
|
36 |
+
path='./data/InfiniteBench/kv_retrieval.jsonl',
|
37 |
+
reader_cfg=InfiniteBench_retrievekv_reader_cfg,
|
38 |
+
infer_cfg=InfiniteBench_retrievekv_infer_cfg,
|
39 |
+
eval_cfg=InfiniteBench_retrievekv_eval_cfg)
|
40 |
+
]
|
opencompass/configs/datasets/infinitebench/infinitebenchretrievenumber/infinitebench_retrievenumber_gen.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .infinitebench_retrievenumber_gen_047436 import InfiniteBench_retrievenumber_datasets # noqa: F401, F403
|
opencompass/configs/datasets/infinitebench/infinitebenchretrievenumber/infinitebench_retrievenumber_gen_047436.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.openicl.icl_evaluator import AccEvaluator
|
5 |
+
from opencompass.datasets import InfiniteBenchretrievenumberDataset
|
6 |
+
from opencompass.datasets.infinitebench.utils import InfiniteBench_first_number_postprocess
|
7 |
+
|
8 |
+
InfiniteBench_retrievenumber_reader_cfg = dict(
|
9 |
+
input_columns=['context', 'input'],
|
10 |
+
output_column='answer',
|
11 |
+
|
12 |
+
)
|
13 |
+
|
14 |
+
InfiniteBench_retrievenumber_infer_cfg = dict(
|
15 |
+
prompt_template=dict(
|
16 |
+
type=PromptTemplate,
|
17 |
+
template=dict(
|
18 |
+
begin=[
|
19 |
+
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
|
20 |
+
],
|
21 |
+
round=[
|
22 |
+
dict(role='HUMAN', prompt='There is an important info hidden inside a lot of irrelevant text. Find it. I will quiz you about the important information there.\n\n{context}\n\n{input}'),
|
23 |
+
dict(role='BOT', prompt=''),
|
24 |
+
], )),
|
25 |
+
retriever=dict(type=ZeroRetriever),
|
26 |
+
inferencer=dict(type=GenInferencer, max_out_len=12)
|
27 |
+
)
|
28 |
+
|
29 |
+
InfiniteBench_retrievenumber_eval_cfg = dict(
|
30 |
+
evaluator=dict(type=AccEvaluator),
|
31 |
+
pred_postprocessor=dict(type=InfiniteBench_first_number_postprocess),
|
32 |
+
pred_role='BOT'
|
33 |
+
)
|
34 |
+
|
35 |
+
InfiniteBench_retrievenumber_datasets = [
|
36 |
+
dict(
|
37 |
+
type=InfiniteBenchretrievenumberDataset,
|
38 |
+
abbr='InfiniteBench_retrievenumber',
|
39 |
+
path='./data/InfiniteBench/number_string.jsonl',
|
40 |
+
reader_cfg=InfiniteBench_retrievenumber_reader_cfg,
|
41 |
+
infer_cfg=InfiniteBench_retrievenumber_infer_cfg,
|
42 |
+
eval_cfg=InfiniteBench_retrievenumber_eval_cfg)
|
43 |
+
]
|
opencompass/configs/datasets/infinitebench/infinitebenchretrievepasskey/infinitebench_retrievepasskey_gen.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .infinitebench_retrievepasskey_gen_62ff68 import InfiniteBench_retrievepasskey_datasets # noqa: F401, F403
|
opencompass/configs/datasets/infinitebench/infinitebenchretrievepasskey/infinitebench_retrievepasskey_gen_62ff68.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.openicl.icl_evaluator import AccEvaluator
|
5 |
+
from opencompass.datasets import InfiniteBenchretrievepasskeyDataset
|
6 |
+
from opencompass.datasets.infinitebench.utils import InfiniteBench_first_number_postprocess
|
7 |
+
|
8 |
+
InfiniteBench_retrievepasskey_reader_cfg = dict(
|
9 |
+
input_columns=['context', 'input'],
|
10 |
+
output_column='answer',
|
11 |
+
|
12 |
+
)
|
13 |
+
|
14 |
+
InfiniteBench_retrievepasskey_infer_cfg = dict(
|
15 |
+
prompt_template=dict(
|
16 |
+
type=PromptTemplate,
|
17 |
+
template=dict(
|
18 |
+
begin=[
|
19 |
+
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
|
20 |
+
],
|
21 |
+
round=[
|
22 |
+
dict(role='HUMAN', prompt='There is an important info hidden inside a lot of irrelevant text. Find it and memorize them. I will quiz you about the important information there.\n\n{context}\n\n{input}'),
|
23 |
+
dict(role='BOT', prompt=''),
|
24 |
+
], )),
|
25 |
+
retriever=dict(type=ZeroRetriever),
|
26 |
+
inferencer=dict(type=GenInferencer, max_out_len=6)
|
27 |
+
)
|
28 |
+
|
29 |
+
InfiniteBench_retrievepasskey_eval_cfg = dict(
|
30 |
+
evaluator=dict(type=AccEvaluator),
|
31 |
+
pred_postprocessor=dict(type=InfiniteBench_first_number_postprocess),
|
32 |
+
pred_role='BOT'
|
33 |
+
)
|
34 |
+
|
35 |
+
InfiniteBench_retrievepasskey_datasets = [
|
36 |
+
dict(
|
37 |
+
type=InfiniteBenchretrievepasskeyDataset,
|
38 |
+
abbr='InfiniteBench_retrievepasskey',
|
39 |
+
path='./data/InfiniteBench/passkey.jsonl',
|
40 |
+
reader_cfg=InfiniteBench_retrievepasskey_reader_cfg,
|
41 |
+
infer_cfg=InfiniteBench_retrievepasskey_infer_cfg,
|
42 |
+
eval_cfg=InfiniteBench_retrievepasskey_eval_cfg)
|
43 |
+
]
|
opencompass/configs/datasets/infinitebench/infinitebenchzhqa/infinitebench_zhqa_gen.py
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .infinitebench_zhqa_gen_1e5293 import InfiniteBench_zhqa_datasets # noqa: F401, F403
|
opencompass/configs/datasets/infinitebench/infinitebenchzhqa/infinitebench_zhqa_gen_1e5293.py
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.datasets import InfiniteBenchzhqaDataset, LongBenchF1Evaluator
|
5 |
+
from opencompass.utils.text_postprocessors import general_cn_postprocess
|
6 |
+
|
7 |
+
InfiniteBench_zhqa_reader_cfg = dict(
|
8 |
+
input_columns=['context', 'question'],
|
9 |
+
output_column='answer',
|
10 |
+
|
11 |
+
)
|
12 |
+
|
13 |
+
InfiniteBench_zhqa_infer_cfg = dict(
|
14 |
+
prompt_template=dict(
|
15 |
+
type=PromptTemplate,
|
16 |
+
template=dict(
|
17 |
+
begin=[
|
18 |
+
dict(role='SYSTEM', fallback_role='HUMAN', prompt='You are a helpful assistant.'),
|
19 |
+
],
|
20 |
+
round=[
|
21 |
+
dict(role='HUMAN', prompt='请根据以下书籍回答我的问题。\n\n{context}\n\n问题:{question}\n请尽量简短地回答。'),
|
22 |
+
dict(role='BOT', prompt=''),
|
23 |
+
], )),
|
24 |
+
retriever=dict(type=ZeroRetriever),
|
25 |
+
inferencer=dict(type=GenInferencer, max_out_len=40)
|
26 |
+
)
|
27 |
+
|
28 |
+
InfiniteBench_zhqa_eval_cfg = dict(
|
29 |
+
evaluator=dict(type=LongBenchF1Evaluator, language='zh'),
|
30 |
+
pred_role='BOT',
|
31 |
+
)
|
32 |
+
|
33 |
+
InfiniteBench_zhqa_datasets = [
|
34 |
+
dict(
|
35 |
+
type=InfiniteBenchzhqaDataset,
|
36 |
+
abbr='InfiniteBench_zhqa',
|
37 |
+
path='./data/InfiniteBench/longbook_qa_chn.jsonl',
|
38 |
+
reader_cfg=InfiniteBench_zhqa_reader_cfg,
|
39 |
+
infer_cfg=InfiniteBench_zhqa_infer_cfg,
|
40 |
+
eval_cfg=InfiniteBench_zhqa_eval_cfg)
|
41 |
+
]
|
opencompass/configs/datasets/lveval/lvevalcmrc_mixup/lveval_cmrc_mixup_gen.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .lveval_cmrc_mixup_gen_465823 import (
|
5 |
+
LVEval_cmrc_mixup_datasets,
|
6 |
+
) # noqa: F401, F403
|
opencompass/configs/datasets/lveval/lvevalcmrc_mixup/lveval_cmrc_mixup_gen_465823.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.datasets import LVEvalOPTF1Evaluator, LVEvalcmrcDataset
|
5 |
+
|
6 |
+
LVEval_cmrc_mixup_reader_cfg = dict(
|
7 |
+
input_columns=['context', 'input'],
|
8 |
+
output_column='answers',
|
9 |
+
train_split='test',
|
10 |
+
test_split='test',
|
11 |
+
)
|
12 |
+
|
13 |
+
LVEval_cmrc_mixup_infer_cfg = dict(
|
14 |
+
prompt_template=dict(
|
15 |
+
type=PromptTemplate,
|
16 |
+
template=dict(
|
17 |
+
round=[
|
18 |
+
dict(
|
19 |
+
role='HUMAN',
|
20 |
+
prompt='请根据下面给定的文章回答问题,问题和答案只与其中一篇文章有关。\n\n文章:{context}\n\n现在请基于上述文章回答下面的问题,问题和答案只与其中一篇文章有关。\n\n问题:{input}\n回答:',
|
21 |
+
),
|
22 |
+
],
|
23 |
+
),
|
24 |
+
),
|
25 |
+
retriever=dict(type=ZeroRetriever),
|
26 |
+
inferencer=dict(type=GenInferencer, max_out_len=64),
|
27 |
+
)
|
28 |
+
|
29 |
+
LVEval_cmrc_mixup_eval_cfg = dict(
|
30 |
+
evaluator=dict(type=LVEvalOPTF1Evaluator, language='zh'), pred_role='BOT'
|
31 |
+
)
|
32 |
+
|
33 |
+
DATASET_LENGTH_LEVEL = ['16k', '32k', '64k', '128k', '256k']
|
34 |
+
|
35 |
+
|
36 |
+
def get_dataset_names(dataset_name, length_levels):
|
37 |
+
datasets = []
|
38 |
+
for length in length_levels:
|
39 |
+
datasets.append(f'{dataset_name}_{length}')
|
40 |
+
return datasets
|
41 |
+
|
42 |
+
|
43 |
+
LVEval_cmrc_mixup_datasets = [
|
44 |
+
dict(
|
45 |
+
type=LVEvalcmrcDataset,
|
46 |
+
abbr='LVEval_' + name_len,
|
47 |
+
path='Infinigence/LVEval',
|
48 |
+
name=name_len,
|
49 |
+
reader_cfg=LVEval_cmrc_mixup_reader_cfg,
|
50 |
+
infer_cfg=LVEval_cmrc_mixup_infer_cfg,
|
51 |
+
eval_cfg=LVEval_cmrc_mixup_eval_cfg,
|
52 |
+
)
|
53 |
+
for name_len in get_dataset_names('cmrc_mixup', DATASET_LENGTH_LEVEL)
|
54 |
+
]
|
opencompass/configs/datasets/lveval/lvevaldureader_mixup/lveval_dureader_mixup_gen.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .lveval_dureader_mixup_gen_465823 import (
|
5 |
+
LVEval_dureader_mixup_datasets,
|
6 |
+
) # noqa: F401, F403
|
opencompass/configs/datasets/lveval/lvevaldureader_mixup/lveval_dureader_mixup_gen_465823.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.datasets import LVEvalOPTRougeEvaluator, LVEvaldureaderDataset
|
5 |
+
|
6 |
+
LVEval_dureader_mixup_reader_cfg = dict(
|
7 |
+
input_columns=['context', 'input'],
|
8 |
+
output_column='answers',
|
9 |
+
train_split='test',
|
10 |
+
test_split='test',
|
11 |
+
)
|
12 |
+
|
13 |
+
LVEval_dureader_mixup_infer_cfg = dict(
|
14 |
+
prompt_template=dict(
|
15 |
+
type=PromptTemplate,
|
16 |
+
template=dict(
|
17 |
+
round=[
|
18 |
+
dict(
|
19 |
+
role='HUMAN',
|
20 |
+
prompt='请根据下面给定的文章回答问题,问题和答案只与其中一篇文章有关。\n\n文章:{context}\n\n现在请基于上述文章回答下面的问题,问题和答案只与其中一篇文章有关。\n\n问题:{input}\n回答:',
|
21 |
+
),
|
22 |
+
],
|
23 |
+
),
|
24 |
+
),
|
25 |
+
retriever=dict(type=ZeroRetriever),
|
26 |
+
inferencer=dict(type=GenInferencer, max_out_len=64),
|
27 |
+
)
|
28 |
+
|
29 |
+
LVEval_dureader_mixup_eval_cfg = dict(
|
30 |
+
evaluator=dict(type=LVEvalOPTRougeEvaluator, language='zh'),
|
31 |
+
pred_role='BOT',
|
32 |
+
)
|
33 |
+
|
34 |
+
DATASET_LENGTH_LEVEL = ['16k', '32k', '64k', '128k', '256k']
|
35 |
+
|
36 |
+
|
37 |
+
def get_dataset_names(dataset_name, length_levels):
|
38 |
+
datasets = []
|
39 |
+
for length in length_levels:
|
40 |
+
datasets.append(f'{dataset_name}_{length}')
|
41 |
+
return datasets
|
42 |
+
|
43 |
+
|
44 |
+
LVEval_dureader_mixup_datasets = [
|
45 |
+
dict(
|
46 |
+
type=LVEvaldureaderDataset,
|
47 |
+
abbr='LVEval_' + name_len,
|
48 |
+
path='Infinigence/LVEval',
|
49 |
+
name=name_len,
|
50 |
+
reader_cfg=LVEval_dureader_mixup_reader_cfg,
|
51 |
+
infer_cfg=LVEval_dureader_mixup_infer_cfg,
|
52 |
+
eval_cfg=LVEval_dureader_mixup_eval_cfg,
|
53 |
+
)
|
54 |
+
for name_len in get_dataset_names('dureader_mixup', DATASET_LENGTH_LEVEL)
|
55 |
+
]
|
opencompass/configs/datasets/lveval/lvevalfactrecall_en/lveval_factrecall_en_gen.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .lveval_factrecall_en_gen_9a836f import (
|
5 |
+
LVEval_factrecall_en_datasets,
|
6 |
+
) # noqa: F401, F403
|
opencompass/configs/datasets/lveval/lvevalfactrecall_en/lveval_factrecall_en_gen_9a836f.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.datasets import LVEvalF1Evaluator, LVEvalfactrecallenDataset
|
5 |
+
|
6 |
+
LVEval_factrecall_en_reader_cfg = dict(
|
7 |
+
input_columns=['context', 'input'],
|
8 |
+
output_column='answers',
|
9 |
+
train_split='test',
|
10 |
+
test_split='test',
|
11 |
+
)
|
12 |
+
|
13 |
+
LVEval_factrecall_en_infer_cfg = dict(
|
14 |
+
prompt_template=dict(
|
15 |
+
type=PromptTemplate,
|
16 |
+
template=dict(
|
17 |
+
round=[
|
18 |
+
dict(
|
19 |
+
role='HUMAN',
|
20 |
+
prompt='Please answer the following questions based on the given article.\n\nArticle: {context}\n\nPlease answer the following questions based on the above article.\n\nQuestion: {input}\nAnswer:',
|
21 |
+
),
|
22 |
+
],
|
23 |
+
),
|
24 |
+
),
|
25 |
+
retriever=dict(type=ZeroRetriever),
|
26 |
+
inferencer=dict(type=GenInferencer, max_out_len=16),
|
27 |
+
)
|
28 |
+
|
29 |
+
LVEval_factrecall_en_eval_cfg = dict(
|
30 |
+
evaluator=dict(type=LVEvalF1Evaluator, language='en'), pred_role='BOT'
|
31 |
+
)
|
32 |
+
|
33 |
+
DATASET_LENGTH_LEVEL = ['16k', '32k', '64k', '128k', '256k']
|
34 |
+
|
35 |
+
|
36 |
+
def get_dataset_names(dataset_name, length_levels):
|
37 |
+
datasets = []
|
38 |
+
for length in length_levels:
|
39 |
+
datasets.append(f'{dataset_name}_{length}')
|
40 |
+
return datasets
|
41 |
+
|
42 |
+
|
43 |
+
LVEval_factrecall_en_datasets = [
|
44 |
+
dict(
|
45 |
+
type=LVEvalfactrecallenDataset,
|
46 |
+
abbr='LVEval_' + name_len,
|
47 |
+
path='Infinigence/LVEval',
|
48 |
+
name=name_len,
|
49 |
+
reader_cfg=LVEval_factrecall_en_reader_cfg,
|
50 |
+
infer_cfg=LVEval_factrecall_en_infer_cfg,
|
51 |
+
eval_cfg=LVEval_factrecall_en_eval_cfg,
|
52 |
+
)
|
53 |
+
for name_len in get_dataset_names('factrecall_en', DATASET_LENGTH_LEVEL)
|
54 |
+
]
|
opencompass/configs/datasets/lveval/lvevalfactrecall_zh/lveval_factrecall_zh_gen.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .lveval_factrecall_zh_gen_dbee70 import (
|
5 |
+
LVEval_factrecall_zh_datasets,
|
6 |
+
) # noqa: F401, F403
|
opencompass/configs/datasets/lveval/lvevalfactrecall_zh/lveval_factrecall_zh_gen_dbee70.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.datasets import LVEvalF1Evaluator, LVEvalfactrecallzhDataset
|
5 |
+
|
6 |
+
LVEval_factrecall_zh_reader_cfg = dict(
|
7 |
+
input_columns=['context', 'input'],
|
8 |
+
output_column='answers',
|
9 |
+
train_split='test',
|
10 |
+
test_split='test',
|
11 |
+
)
|
12 |
+
|
13 |
+
LVEval_factrecall_zh_infer_cfg = dict(
|
14 |
+
prompt_template=dict(
|
15 |
+
type=PromptTemplate,
|
16 |
+
template=dict(
|
17 |
+
round=[
|
18 |
+
dict(
|
19 |
+
role='HUMAN',
|
20 |
+
prompt='请基于给定的文章回答下述问题。\n\n文章:{context}\n\n现在请基于上述文章回答下面的问题。\n\n问题:{input}\n回答:',
|
21 |
+
),
|
22 |
+
],
|
23 |
+
),
|
24 |
+
),
|
25 |
+
retriever=dict(type=ZeroRetriever),
|
26 |
+
inferencer=dict(type=GenInferencer, max_out_len=16),
|
27 |
+
)
|
28 |
+
|
29 |
+
LVEval_factrecall_zh_eval_cfg = dict(
|
30 |
+
evaluator=dict(type=LVEvalF1Evaluator, language='zh'), pred_role='BOT'
|
31 |
+
)
|
32 |
+
|
33 |
+
DATASET_LENGTH_LEVEL = ['16k', '32k', '64k', '128k', '256k']
|
34 |
+
|
35 |
+
|
36 |
+
def get_dataset_names(dataset_name, length_levels):
|
37 |
+
datasets = []
|
38 |
+
for length in length_levels:
|
39 |
+
datasets.append(f'{dataset_name}_{length}')
|
40 |
+
return datasets
|
41 |
+
|
42 |
+
|
43 |
+
LVEval_factrecall_zh_datasets = [
|
44 |
+
dict(
|
45 |
+
type=LVEvalfactrecallzhDataset,
|
46 |
+
abbr='LVEval_' + name_len,
|
47 |
+
path='Infinigence/LVEval',
|
48 |
+
name=name_len,
|
49 |
+
reader_cfg=LVEval_factrecall_zh_reader_cfg,
|
50 |
+
infer_cfg=LVEval_factrecall_zh_infer_cfg,
|
51 |
+
eval_cfg=LVEval_factrecall_zh_eval_cfg,
|
52 |
+
)
|
53 |
+
for name_len in get_dataset_names('factrecall_zh', DATASET_LENGTH_LEVEL)
|
54 |
+
]
|
opencompass/configs/datasets/lveval/lvevalhotpotwikiqa_mixup/lveval_hotpotwikiqa_mixup_gen.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .lveval_hotpotwikiqa_mixup_gen_77ce82 import (
|
5 |
+
LVEval_hotpotwikiqa_mixup_datasets,
|
6 |
+
) # noqa: F401, F403
|
opencompass/configs/datasets/lveval/lvevalhotpotwikiqa_mixup/lveval_hotpotwikiqa_mixup_gen_77ce82.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.datasets import (
|
5 |
+
LVEvalOPTF1Evaluator,
|
6 |
+
LVEvalhotpotwikiqaDataset,
|
7 |
+
)
|
8 |
+
|
9 |
+
LVEval_hotpotwikiqa_mixup_reader_cfg = dict(
|
10 |
+
input_columns=['context', 'input'],
|
11 |
+
output_column='answers',
|
12 |
+
train_split='test',
|
13 |
+
test_split='test',
|
14 |
+
)
|
15 |
+
|
16 |
+
LVEval_hotpotwikiqa_mixup_infer_cfg = dict(
|
17 |
+
prompt_template=dict(
|
18 |
+
type=PromptTemplate,
|
19 |
+
template=dict(
|
20 |
+
round=[
|
21 |
+
dict(
|
22 |
+
role='HUMAN',
|
23 |
+
prompt='Answer the question based on the given passages. Questions and answers are only relevant to some passages. Only give me the answer and do not output any other explanation and evidence.\n\nArticle: {context}\n\nPlease answer the following question based on the above passages. Questions and answers are only relevant to some passages. Only give me the answer and do not output any other explanation and evidence.\n\nQuestion: {input}\nAnswer:',
|
24 |
+
),
|
25 |
+
],
|
26 |
+
),
|
27 |
+
),
|
28 |
+
retriever=dict(type=ZeroRetriever),
|
29 |
+
inferencer=dict(type=GenInferencer, max_out_len=64),
|
30 |
+
)
|
31 |
+
|
32 |
+
LVEval_hotpotwikiqa_mixup_eval_cfg = dict(
|
33 |
+
evaluator=dict(type=LVEvalOPTF1Evaluator, language='en'), pred_role='BOT'
|
34 |
+
)
|
35 |
+
|
36 |
+
DATASET_LENGTH_LEVEL = ['16k', '32k', '64k', '128k', '256k']
|
37 |
+
|
38 |
+
|
39 |
+
def get_dataset_names(dataset_name, length_levels):
|
40 |
+
datasets = []
|
41 |
+
for length in length_levels:
|
42 |
+
datasets.append(f'{dataset_name}_{length}')
|
43 |
+
return datasets
|
44 |
+
|
45 |
+
|
46 |
+
LVEval_hotpotwikiqa_mixup_datasets = [
|
47 |
+
dict(
|
48 |
+
type=LVEvalhotpotwikiqaDataset,
|
49 |
+
abbr='LVEval_' + name_len,
|
50 |
+
path='Infinigence/LVEval',
|
51 |
+
name=name_len,
|
52 |
+
reader_cfg=LVEval_hotpotwikiqa_mixup_reader_cfg,
|
53 |
+
infer_cfg=LVEval_hotpotwikiqa_mixup_infer_cfg,
|
54 |
+
eval_cfg=LVEval_hotpotwikiqa_mixup_eval_cfg,
|
55 |
+
)
|
56 |
+
for name_len in get_dataset_names(
|
57 |
+
'hotpotwikiqa_mixup', DATASET_LENGTH_LEVEL
|
58 |
+
)
|
59 |
+
]
|
opencompass/configs/datasets/lveval/lvevallic_mixup/lveval_lic_mixup_gen.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .lveval_lic_mixup_gen_01eb0c import (
|
5 |
+
LVEval_lic_mixup_datasets,
|
6 |
+
) # noqa: F401, F403
|
opencompass/configs/datasets/lveval/lvevallic_mixup/lveval_lic_mixup_gen_01eb0c.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.datasets import LVEvalOPTF1Evaluator, LVEvallicDataset
|
5 |
+
|
6 |
+
LVEval_lic_mixup_reader_cfg = dict(
|
7 |
+
input_columns=['context', 'input'],
|
8 |
+
output_column='answers',
|
9 |
+
train_split='test',
|
10 |
+
test_split='test',
|
11 |
+
)
|
12 |
+
|
13 |
+
LVEval_lic_mixup_infer_cfg = dict(
|
14 |
+
prompt_template=dict(
|
15 |
+
type=PromptTemplate,
|
16 |
+
template=dict(
|
17 |
+
round=[
|
18 |
+
dict(
|
19 |
+
role='HUMAN',
|
20 |
+
prompt='请根据下面给定的文章回答问题,问题和答案只与其中一篇文章有关。\n\n文章:{context}\n\n请现在基于上述文章回答下面的问题,问题和答案只与其中一篇文章有关。\n\n问题:{input}\n回答:',
|
21 |
+
),
|
22 |
+
],
|
23 |
+
),
|
24 |
+
),
|
25 |
+
retriever=dict(type=ZeroRetriever),
|
26 |
+
inferencer=dict(type=GenInferencer, max_out_len=64),
|
27 |
+
)
|
28 |
+
|
29 |
+
LVEval_lic_mixup_eval_cfg = dict(
|
30 |
+
evaluator=dict(type=LVEvalOPTF1Evaluator, language='zh'), pred_role='BOT'
|
31 |
+
)
|
32 |
+
|
33 |
+
DATASET_LENGTH_LEVEL = ['16k', '32k', '64k', '128k', '256k']
|
34 |
+
|
35 |
+
|
36 |
+
def get_dataset_names(dataset_name, length_levels):
|
37 |
+
datasets = []
|
38 |
+
for length in length_levels:
|
39 |
+
datasets.append(f'{dataset_name}_{length}')
|
40 |
+
return datasets
|
41 |
+
|
42 |
+
|
43 |
+
LVEval_lic_mixup_datasets = [
|
44 |
+
dict(
|
45 |
+
type=LVEvallicDataset,
|
46 |
+
abbr='LVEval_' + name_len,
|
47 |
+
path='Infinigence/LVEval',
|
48 |
+
name=name_len,
|
49 |
+
reader_cfg=LVEval_lic_mixup_reader_cfg,
|
50 |
+
infer_cfg=LVEval_lic_mixup_infer_cfg,
|
51 |
+
eval_cfg=LVEval_lic_mixup_eval_cfg,
|
52 |
+
)
|
53 |
+
for name_len in get_dataset_names('lic_mixup', DATASET_LENGTH_LEVEL)
|
54 |
+
]
|
opencompass/configs/datasets/lveval/lvevalloogle_CR_mixup/lveval_loogle_CR_mixup_gen.py
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from mmengine.config import read_base
|
2 |
+
|
3 |
+
with read_base():
|
4 |
+
from .lveval_loogle_CR_mixup_gen_d7ea36 import (
|
5 |
+
LVEval_loogle_CR_mixup_datasets,
|
6 |
+
) # noqa: F401, F403
|
opencompass/configs/datasets/lveval/lvevalloogle_CR_mixup/lveval_loogle_CR_mixup_gen_d7ea36.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from opencompass.openicl.icl_prompt_template import PromptTemplate
|
2 |
+
from opencompass.openicl.icl_retriever import ZeroRetriever
|
3 |
+
from opencompass.openicl.icl_inferencer import GenInferencer
|
4 |
+
from opencompass.datasets import LVEvalOPTF1Evaluator, LVEvallooglecrDataset
|
5 |
+
|
6 |
+
LVEval_loogle_CR_mixup_reader_cfg = dict(
|
7 |
+
input_columns=['context', 'input'],
|
8 |
+
output_column='answers',
|
9 |
+
train_split='test',
|
10 |
+
test_split='test',
|
11 |
+
)
|
12 |
+
|
13 |
+
LVEval_loogle_CR_mixup_infer_cfg = dict(
|
14 |
+
prompt_template=dict(
|
15 |
+
type=PromptTemplate,
|
16 |
+
template=dict(
|
17 |
+
round=[
|
18 |
+
dict(
|
19 |
+
role='HUMAN',
|
20 |
+
prompt='Please answer the following question based on the given passages. Questions and answers are only relevant to one passage. Only give me the answer and do not output any other explanation and evidence.\n\nArticle: {context}\n\nPlease answer the following question based on the above passages. Questions and answers are only relevant to one passage. Only give me the answer and do not output any other explanation and evidence.\n\nQuestion: {input}\nAnswer:',
|
21 |
+
),
|
22 |
+
],
|
23 |
+
),
|
24 |
+
),
|
25 |
+
retriever=dict(type=ZeroRetriever),
|
26 |
+
inferencer=dict(type=GenInferencer, max_out_len=64),
|
27 |
+
)
|
28 |
+
|
29 |
+
LVEval_loogle_CR_mixup_eval_cfg = dict(
|
30 |
+
evaluator=dict(type=LVEvalOPTF1Evaluator, language='en'), pred_role='BOT'
|
31 |
+
)
|
32 |
+
|
33 |
+
DATASET_LENGTH_LEVEL = ['16k', '32k', '64k', '128k', '256k']
|
34 |
+
|
35 |
+
|
36 |
+
def get_dataset_names(dataset_name, length_levels):
|
37 |
+
datasets = []
|
38 |
+
for length in length_levels:
|
39 |
+
datasets.append(f'{dataset_name}_{length}')
|
40 |
+
return datasets
|
41 |
+
|
42 |
+
|
43 |
+
LVEval_loogle_CR_mixup_datasets = [
|
44 |
+
dict(
|
45 |
+
type=LVEvallooglecrDataset,
|
46 |
+
abbr='LVEval_' + name_len,
|
47 |
+
path='Infinigence/LVEval',
|
48 |
+
name=name_len,
|
49 |
+
reader_cfg=LVEval_loogle_CR_mixup_reader_cfg,
|
50 |
+
infer_cfg=LVEval_loogle_CR_mixup_infer_cfg,
|
51 |
+
eval_cfg=LVEval_loogle_CR_mixup_eval_cfg,
|
52 |
+
)
|
53 |
+
for name_len in get_dataset_names('loogle_CR_mixup', DATASET_LENGTH_LEVEL)
|
54 |
+
]
|