Mohamed Sana commited on
Commit
5b22a44
·
1 Parent(s): b9445e9

update about section

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. src/about.py +9 -12
README.md CHANGED
@@ -16,7 +16,7 @@ space_ci:
16
  - HF_TOKEN
17
  tags:
18
  - leaderboard
19
- short_description: Track, rank and evaluate open Arabic LLMs and chatbots
20
  ---
21
 
22
  # Start the configuration
 
16
  - HF_TOKEN
17
  tags:
18
  - leaderboard
19
+ short_description: Track, rank and evaluate Open Telecom LLMs and chatbots
20
  ---
21
 
22
  # Start the configuration
src/about.py CHANGED
@@ -11,12 +11,9 @@ class Task:
11
  # ---------------------------------------------------
12
  class Tasks(Enum):
13
  # # task_key in the json file, metric_key in the json file, name to display in the leaderboard
14
- tsg_avg = Task("custom|3gpp:tsg|0", "em", "TSG-AVG")
15
  tele_EQ = Task("custom|telecom:math|0", "em", "TELE-EQ")
16
- # tsg_sa = Task("3gpp|tsg_sa:_average|0", "acc", "TSG-SA")
17
- # tsg_ct = Task("3gpp|tsg_ct:_average|0", "acc", "TSG-CT")
18
- # tele_EQ = Task("tii|tele_EQ:_average|0", "cosine_similarity", "TELE-EQ")
19
- # tele_QnA = Task("huawei|tele_QnA:_average|0", "acc", "TELE-QnA")
20
 
21
 
22
  NUM_FEWSHOT = 0 # Change with your few shot
@@ -32,14 +29,14 @@ BOTTOM_LOGO = """<img src="https://avatars.githubusercontent.com/u/148767883?v=4
32
 
33
  # What does your leaderboard evaluate?
34
  INTRODUCTION_TEXT = """
35
- 🌐 The Open TELCOM LLM Leaderboard : Evaluate and compare the performance of Telecom Large Language Models (LLMs).
36
 
37
 
38
  When you submit a model on the "Submit here!" page, it is automatically evaluated on a set of benchmarks.
39
 
40
  The GPU used for evaluation is operated with the support of __[Huawei Technologies France](https://www.huawei.com/)__, __[Technology Innovation Institute (TII)](https://www.tii.ae/)__, and __[GSM Association (GSMA)](https://www.gsma.com/)__.
41
 
42
- The datasets used for evaluation consist of datasets that are the `TeleQna` benchmark from [TeleQna](https://github.com/netop-team/TeleQnA) and `BENCHMARK` benchmark from [BENCHMARK_HUB](https://benchmarkwebsite.com) to assess reasoning, language understanding, commonsense, and more.
43
 
44
  More details about the benchmarks and the evaluation process is provided on the “About” page.
45
  """
@@ -129,12 +126,12 @@ If everything is done, check you can launch the LightEval script on your model l
129
 
130
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
131
  CITATION_BUTTON_TEXT = r"""
132
- @misc{Netop,
133
- author = {xxxx, },
134
- title = {Open Telco LLM Leaderboard},
135
  year = {2024},
136
- publisher = {Netop},
137
- howpublished = "\url{https://huggingface.co/spaces/netop/Open-Telecom-LLM-Leaderboard}"
138
  }
139
 
140
  @article{maatouk2023teleqna,
 
11
  # ---------------------------------------------------
12
  class Tasks(Enum):
13
  # # task_key in the json file, metric_key in the json file, name to display in the leaderboard
14
+ tsg_avg = Task("custom|3gpp:tsg|0", "em", "3GPP-TSG")
15
  tele_EQ = Task("custom|telecom:math|0", "em", "TELE-EQ")
16
+ tele_QnA = Task("custom|telecom:qna|0", "em", "TELE-QnA")
 
 
 
17
 
18
 
19
  NUM_FEWSHOT = 0 # Change with your few shot
 
29
 
30
  # What does your leaderboard evaluate?
31
  INTRODUCTION_TEXT = """
32
+ 🌐 The Open Telecom LLM Leaderboard : Evaluate and compare the performance of Telecom Large Language Models (LLMs).
33
 
34
 
35
  When you submit a model on the "Submit here!" page, it is automatically evaluated on a set of benchmarks.
36
 
37
  The GPU used for evaluation is operated with the support of __[Huawei Technologies France](https://www.huawei.com/)__, __[Technology Innovation Institute (TII)](https://www.tii.ae/)__, and __[GSM Association (GSMA)](https://www.gsma.com/)__.
38
 
39
+ The datasets used for evaluation are the `TeleQnA` benchmark from [TeleQnA](https://github.com/netop-team/TeleQnA), `TeleEQ` benchmark from [TeleEQ](https://arxiv.org/pdf/2407.09424), and `3GPP-TSG` benchmark from [3GPP-TSG](https://arxiv.org/pdf/2407.09424) to assess reasoning, language understanding, commonsense, and more.
40
 
41
  More details about the benchmarks and the evaluation process is provided on the “About” page.
42
  """
 
126
 
127
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
128
  CITATION_BUTTON_TEXT = r"""
129
+ @misc{otellm,
130
+ author = {Sana, Mohamed and De Domenico, Antonio and Debbah, Merouane and Zhao, Qiyang},
131
+ title = {Open Telecom LLM Leaderboard},
132
  year = {2024},
133
+ publisher = {otellm},
134
+ howpublished = "\url{https://huggingface.co/spaces/otellm/Open-Telecom-LLM-Leaderboard}"
135
  }
136
 
137
  @article{maatouk2023teleqna,