Mohamed Sana
commited on
Commit
·
5b22a44
1
Parent(s):
b9445e9
update about section
Browse files- README.md +1 -1
- src/about.py +9 -12
README.md
CHANGED
@@ -16,7 +16,7 @@ space_ci:
|
|
16 |
- HF_TOKEN
|
17 |
tags:
|
18 |
- leaderboard
|
19 |
-
short_description: Track, rank and evaluate
|
20 |
---
|
21 |
|
22 |
# Start the configuration
|
|
|
16 |
- HF_TOKEN
|
17 |
tags:
|
18 |
- leaderboard
|
19 |
+
short_description: Track, rank and evaluate Open Telecom LLMs and chatbots
|
20 |
---
|
21 |
|
22 |
# Start the configuration
|
src/about.py
CHANGED
@@ -11,12 +11,9 @@ class Task:
|
|
11 |
# ---------------------------------------------------
|
12 |
class Tasks(Enum):
|
13 |
# # task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
14 |
-
tsg_avg = Task("custom|3gpp:tsg|0", "em", "TSG
|
15 |
tele_EQ = Task("custom|telecom:math|0", "em", "TELE-EQ")
|
16 |
-
|
17 |
-
# tsg_ct = Task("3gpp|tsg_ct:_average|0", "acc", "TSG-CT")
|
18 |
-
# tele_EQ = Task("tii|tele_EQ:_average|0", "cosine_similarity", "TELE-EQ")
|
19 |
-
# tele_QnA = Task("huawei|tele_QnA:_average|0", "acc", "TELE-QnA")
|
20 |
|
21 |
|
22 |
NUM_FEWSHOT = 0 # Change with your few shot
|
@@ -32,14 +29,14 @@ BOTTOM_LOGO = """<img src="https://avatars.githubusercontent.com/u/148767883?v=4
|
|
32 |
|
33 |
# What does your leaderboard evaluate?
|
34 |
INTRODUCTION_TEXT = """
|
35 |
-
🌐 The Open
|
36 |
|
37 |
|
38 |
When you submit a model on the "Submit here!" page, it is automatically evaluated on a set of benchmarks.
|
39 |
|
40 |
The GPU used for evaluation is operated with the support of __[Huawei Technologies France](https://www.huawei.com/)__, __[Technology Innovation Institute (TII)](https://www.tii.ae/)__, and __[GSM Association (GSMA)](https://www.gsma.com/)__.
|
41 |
|
42 |
-
The datasets used for evaluation
|
43 |
|
44 |
More details about the benchmarks and the evaluation process is provided on the “About” page.
|
45 |
"""
|
@@ -129,12 +126,12 @@ If everything is done, check you can launch the LightEval script on your model l
|
|
129 |
|
130 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
131 |
CITATION_BUTTON_TEXT = r"""
|
132 |
-
@misc{
|
133 |
-
author = {
|
134 |
-
title = {Open
|
135 |
year = {2024},
|
136 |
-
publisher = {
|
137 |
-
howpublished = "\url{https://huggingface.co/spaces/
|
138 |
}
|
139 |
|
140 |
@article{maatouk2023teleqna,
|
|
|
11 |
# ---------------------------------------------------
|
12 |
class Tasks(Enum):
|
13 |
# # task_key in the json file, metric_key in the json file, name to display in the leaderboard
|
14 |
+
tsg_avg = Task("custom|3gpp:tsg|0", "em", "3GPP-TSG")
|
15 |
tele_EQ = Task("custom|telecom:math|0", "em", "TELE-EQ")
|
16 |
+
tele_QnA = Task("custom|telecom:qna|0", "em", "TELE-QnA")
|
|
|
|
|
|
|
17 |
|
18 |
|
19 |
NUM_FEWSHOT = 0 # Change with your few shot
|
|
|
29 |
|
30 |
# What does your leaderboard evaluate?
|
31 |
INTRODUCTION_TEXT = """
|
32 |
+
🌐 The Open Telecom LLM Leaderboard : Evaluate and compare the performance of Telecom Large Language Models (LLMs).
|
33 |
|
34 |
|
35 |
When you submit a model on the "Submit here!" page, it is automatically evaluated on a set of benchmarks.
|
36 |
|
37 |
The GPU used for evaluation is operated with the support of __[Huawei Technologies France](https://www.huawei.com/)__, __[Technology Innovation Institute (TII)](https://www.tii.ae/)__, and __[GSM Association (GSMA)](https://www.gsma.com/)__.
|
38 |
|
39 |
+
The datasets used for evaluation are the `TeleQnA` benchmark from [TeleQnA](https://github.com/netop-team/TeleQnA), `TeleEQ` benchmark from [TeleEQ](https://arxiv.org/pdf/2407.09424), and `3GPP-TSG` benchmark from [3GPP-TSG](https://arxiv.org/pdf/2407.09424) to assess reasoning, language understanding, commonsense, and more.
|
40 |
|
41 |
More details about the benchmarks and the evaluation process is provided on the “About” page.
|
42 |
"""
|
|
|
126 |
|
127 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
128 |
CITATION_BUTTON_TEXT = r"""
|
129 |
+
@misc{otellm,
|
130 |
+
author = {Sana, Mohamed and De Domenico, Antonio and Debbah, Merouane and Zhao, Qiyang},
|
131 |
+
title = {Open Telecom LLM Leaderboard},
|
132 |
year = {2024},
|
133 |
+
publisher = {otellm},
|
134 |
+
howpublished = "\url{https://huggingface.co/spaces/otellm/Open-Telecom-LLM-Leaderboard}"
|
135 |
}
|
136 |
|
137 |
@article{maatouk2023teleqna,
|