Arabic-Leaderboards / assets /results /aragen_v2_results.json
alielfilali01's picture
Rename assets/results/results.json to assets/results/aragen_v2_results.json
45d46d1 verified
raw
history blame
71.1 kB
[
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4882,
"Completeness": 0.4755,
"Conciseness": 0.1973,
"Helpfulness": 0.4659,
"Honesty": 0.4711,
"Harmlessness": 0.4875,
"3C3H Score": 0.4309
},
"Tasks Scores": {
"Question Answering (QA)": 0.2919,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7292,
"Reasoning": 0.8423
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-72B-Instruct",
"License": "qwen",
"Revision": "main",
"Precision": "bfloat16",
"Params": 72.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4892,
"Completeness": 0.4451,
"Conciseness": 0.324,
"Helpfulness": 0.4667,
"Honesty": 0.4738,
"Harmlessness": 0.4885,
"3C3H Score": 0.4479
},
"Tasks Scores": {
"Question Answering (QA)": 0.2968,
"Orthographic and Grammatical Analysis": 0.0958,
"Safety": 0.951,
"Reasoning": 0.7429
}
},
"Meta": {
"Model Name": "claude-3-5-haiku-20241022",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6049,
"Completeness": 0.5667,
"Conciseness": 0.3914,
"Helpfulness": 0.586,
"Honesty": 0.585,
"Harmlessness": 0.602,
"3C3H Score": 0.556
},
"Tasks Scores": {
"Question Answering (QA)": 0.4152,
"Orthographic and Grammatical Analysis": 0.3625,
"Safety": 0.9687,
"Reasoning": 0.8054
}
},
"Meta": {
"Model Name": "claude-3-5-sonnet-20241022",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6225,
"Completeness": 0.5853,
"Conciseness": 0.3449,
"Helpfulness": 0.6039,
"Honesty": 0.614,
"Harmlessness": 0.6218,
"3C3H Score": 0.5654
},
"Tasks Scores": {
"Question Answering (QA)": 0.4179,
"Orthographic and Grammatical Analysis": 0.4042,
"Safety": 0.8698,
"Reasoning": 0.8821
}
},
"Meta": {
"Model Name": "claude-3-7-sonnet-20250219",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5755,
"Completeness": 0.5392,
"Conciseness": 0.2561,
"Helpfulness": 0.5495,
"Honesty": 0.5642,
"Harmlessness": 0.5755,
"3C3H Score": 0.51
},
"Tasks Scores": {
"Question Answering (QA)": 0.4041,
"Orthographic and Grammatical Analysis": 0.1833,
"Safety": 0.7,
"Reasoning": 0.8441
}
},
"Meta": {
"Model Name": "deepseek-chat",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.6314,
"Completeness": 0.5667,
"Conciseness": 0.3995,
"Helpfulness": 0.5966,
"Honesty": 0.6179,
"Harmlessness": 0.6306,
"3C3H Score": 0.5738
},
"Tasks Scores": {
"Question Answering (QA)": 0.4704,
"Orthographic and Grammatical Analysis": 0.2306,
"Safety": 0.9021,
"Reasoning": 0.8286
}
},
"Meta": {
"Model Name": "gpt-4o-2024-08-06",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.451,
"Completeness": 0.4088,
"Conciseness": 0.276,
"Helpfulness": 0.4206,
"Honesty": 0.4358,
"Harmlessness": 0.4451,
"3C3H Score": 0.4062
},
"Tasks Scores": {
"Question Answering (QA)": 0.2562,
"Orthographic and Grammatical Analysis": 0.0361,
"Safety": 0.8677,
"Reasoning": 0.7298
}
},
"Meta": {
"Model Name": "gpt-4o-mini-2024-07-18",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.7588,
"Completeness": 0.7098,
"Conciseness": 0.5125,
"Helpfulness": 0.7255,
"Honesty": 0.7525,
"Harmlessness": 0.7559,
"3C3H Score": 0.7025
},
"Tasks Scores": {
"Question Answering (QA)": 0.6051,
"Orthographic and Grammatical Analysis": 0.4528,
"Safety": 0.9437,
"Reasoning": 0.95
}
},
"Meta": {
"Model Name": "o1-2024-12-17",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4755,
"Completeness": 0.4676,
"Conciseness": 0.2804,
"Helpfulness": 0.4627,
"Honesty": 0.4667,
"Harmlessness": 0.474,
"3C3H Score": 0.4378
},
"Tasks Scores": {
"Question Answering (QA)": 0.2435,
"Orthographic and Grammatical Analysis": 0.0292,
"Safety": 0.8958,
"Reasoning": 0.9065
}
},
"Meta": {
"Model Name": "o1-mini-2024-09-12",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5608,
"Completeness": 0.5235,
"Conciseness": 0.3672,
"Helpfulness": 0.5353,
"Honesty": 0.551,
"Harmlessness": 0.56,
"3C3H Score": 0.5163
},
"Tasks Scores": {
"Question Answering (QA)": 0.3458,
"Orthographic and Grammatical Analysis": 0.0875,
"Safety": 0.9448,
"Reasoning": 0.9423
}
},
"Meta": {
"Model Name": "o3-mini-2025-01-31",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3088,
"Completeness": 0.2461,
"Conciseness": 0.1998,
"Helpfulness": 0.2674,
"Honesty": 0.2956,
"Harmlessness": 0.3081,
"3C3H Score": 0.271
},
"Tasks Scores": {
"Question Answering (QA)": 0.1979,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7854,
"Reasoning": 0.3018
}
},
"Meta": {
"Model Name": "Mohaddz/Thinking-Camel-7b",
"License": "Open",
"Revision": "main",
"Precision": "float16",
"Params": 7.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3735,
"Completeness": 0.3539,
"Conciseness": 0.1699,
"Helpfulness": 0.3554,
"Honesty": 0.3625,
"Harmlessness": 0.3735,
"3C3H Score": 0.3315
},
"Tasks Scores": {
"Question Answering (QA)": 0.1528,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7521,
"Reasoning": 0.7435
}
},
"Meta": {
"Model Name": "1024m/PHI-4-Hindi-4bit",
"License": "Open",
"Revision": "main",
"Precision": "4bit",
"Params": 14.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3147,
"Completeness": 0.2529,
"Conciseness": 0.2027,
"Helpfulness": 0.2713,
"Honesty": 0.2988,
"Harmlessness": 0.3088,
"3C3H Score": 0.2749
},
"Tasks Scores": {
"Question Answering (QA)": 0.1996,
"Orthographic and Grammatical Analysis": 0.0056,
"Safety": 0.7625,
"Reasoning": 0.3268
}
},
"Meta": {
"Model Name": "ALLaM-AI/ALLaM-7B-Instruct-preview",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 7.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2451,
"Completeness": 0.2059,
"Conciseness": 0.1282,
"Helpfulness": 0.2088,
"Honesty": 0.2375,
"Harmlessness": 0.2436,
"3C3H Score": 0.2115
},
"Tasks Scores": {
"Question Answering (QA)": 0.1927,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.4146,
"Reasoning": 0.2399
}
},
"Meta": {
"Model Name": "CohereForAI/aya-23-35B",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 35.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.1765,
"Completeness": 0.1461,
"Conciseness": 0.0929,
"Helpfulness": 0.1502,
"Honesty": 0.1725,
"Harmlessness": 0.1757,
"3C3H Score": 0.1523
},
"Tasks Scores": {
"Question Answering (QA)": 0.1296,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.4844,
"Reasoning": 0.0929
}
},
"Meta": {
"Model Name": "CohereForAI/aya-23-8B",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 8.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3795,
"Completeness": 0.3618,
"Conciseness": 0.1401,
"Helpfulness": 0.3545,
"Honesty": 0.3582,
"Harmlessness": 0.3744,
"3C3H Score": 0.3281
},
"Tasks Scores": {
"Question Answering (QA)": 0.2394,
"Orthographic and Grammatical Analysis": 0.0556,
"Safety": 0.6823,
"Reasoning": 0.4946
}
},
"Meta": {
"Model Name": "CohereForAI/aya-expanse-32b",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 32.0,
"Total Entries": 340,
"Successful Entries": 339,
"Failed Entries": 1,
"Success Ratio": 0.9971
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3029,
"Completeness": 0.2882,
"Conciseness": 0.1022,
"Helpfulness": 0.2841,
"Honesty": 0.2902,
"Harmlessness": 0.3015,
"3C3H Score": 0.2615
},
"Tasks Scores": {
"Question Answering (QA)": 0.174,
"Orthographic and Grammatical Analysis": 0.0319,
"Safety": 0.6531,
"Reasoning": 0.3863
}
},
"Meta": {
"Model Name": "CohereForAI/aya-expanse-8b",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 8.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5412,
"Completeness": 0.5275,
"Conciseness": 0.2047,
"Helpfulness": 0.5284,
"Honesty": 0.5287,
"Harmlessness": 0.5397,
"3C3H Score": 0.4783
},
"Tasks Scores": {
"Question Answering (QA)": 0.3701,
"Orthographic and Grammatical Analysis": 0.1444,
"Safety": 0.7604,
"Reasoning": 0.7696
}
},
"Meta": {
"Model Name": "CohereForAI/c4ai-command-a-03-2025",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 111.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3235,
"Completeness": 0.2742,
"Conciseness": 0.162,
"Helpfulness": 0.2818,
"Honesty": 0.3119,
"Harmlessness": 0.3235,
"3C3H Score": 0.2795
},
"Tasks Scores": {
"Question Answering (QA)": 0.2439,
"Orthographic and Grammatical Analysis": 0.0333,
"Safety": 0.4042,
"Reasoning": 0.4143
}
},
"Meta": {
"Model Name": "CohereForAI/c4ai-command-r-08-2024",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 32.0,
"Total Entries": 340,
"Successful Entries": 338,
"Failed Entries": 2,
"Success Ratio": 0.9941
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3529,
"Completeness": 0.3137,
"Conciseness": 0.1652,
"Helpfulness": 0.3069,
"Honesty": 0.3363,
"Harmlessness": 0.3485,
"3C3H Score": 0.3039
},
"Tasks Scores": {
"Question Answering (QA)": 0.2773,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.3646,
"Reasoning": 0.4756
}
},
"Meta": {
"Model Name": "CohereForAI/c4ai-command-r-plus-08-2024",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 104.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3667,
"Completeness": 0.302,
"Conciseness": 0.1968,
"Helpfulness": 0.3132,
"Honesty": 0.3559,
"Harmlessness": 0.3667,
"3C3H Score": 0.3169
},
"Tasks Scores": {
"Question Answering (QA)": 0.2866,
"Orthographic and Grammatical Analysis": 0.0639,
"Safety": 0.6469,
"Reasoning": 0.3232
}
},
"Meta": {
"Model Name": "CohereForAI/c4ai-command-r-plus",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 104.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2517,
"Completeness": 0.2104,
"Conciseness": 0.115,
"Helpfulness": 0.2099,
"Honesty": 0.237,
"Harmlessness": 0.2495,
"3C3H Score": 0.2123
},
"Tasks Scores": {
"Question Answering (QA)": 0.2255,
"Orthographic and Grammatical Analysis": 0.0333,
"Safety": 0.2937,
"Reasoning": 0.2048
}
},
"Meta": {
"Model Name": "CohereForAI/c4ai-command-r-v01",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "float16",
"Params": 35.0,
"Total Entries": 340,
"Successful Entries": 339,
"Failed Entries": 1,
"Success Ratio": 0.9971
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4569,
"Completeness": 0.452,
"Conciseness": 0.1904,
"Helpfulness": 0.4365,
"Honesty": 0.4373,
"Harmlessness": 0.4554,
"3C3H Score": 0.4047
},
"Tasks Scores": {
"Question Answering (QA)": 0.2712,
"Orthographic and Grammatical Analysis": 0.0278,
"Safety": 0.8031,
"Reasoning": 0.7202
}
},
"Meta": {
"Model Name": "MaziyarPanahi/calme-2.1-qwen2.5-72b",
"License": "tongyi-qianwen",
"Revision": "main",
"Precision": "bfloat16",
"Params": 72.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4745,
"Completeness": 0.4716,
"Conciseness": 0.2025,
"Helpfulness": 0.4603,
"Honesty": 0.4581,
"Harmlessness": 0.4745,
"3C3H Score": 0.4236
},
"Tasks Scores": {
"Question Answering (QA)": 0.2809,
"Orthographic and Grammatical Analysis": 0.0542,
"Safety": 0.8011,
"Reasoning": 0.7738
}
},
"Meta": {
"Model Name": "MaziyarPanahi/calme-2.2-qwen2.5-72b",
"License": "tongyi-qianwen",
"Revision": "main",
"Precision": "bfloat16",
"Params": 72.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3108,
"Completeness": 0.2471,
"Conciseness": 0.2005,
"Helpfulness": 0.2672,
"Honesty": 0.299,
"Harmlessness": 0.31,
"3C3H Score": 0.2724
},
"Tasks Scores": {
"Question Answering (QA)": 0.2002,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7865,
"Reasoning": 0.3018
}
},
"Meta": {
"Model Name": "Mohaddz/Thinking-cow-7B",
"License": "Apache license 2.0",
"Revision": "main",
"Precision": "float16",
"Params": 7.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3275,
"Completeness": 0.2284,
"Conciseness": 0.2463,
"Helpfulness": 0.2613,
"Honesty": 0.3159,
"Harmlessness": 0.3275,
"3C3H Score": 0.2845
},
"Tasks Scores": {
"Question Answering (QA)": 0.2005,
"Orthographic and Grammatical Analysis": 0.0444,
"Safety": 0.8302,
"Reasoning": 0.3155
}
},
"Meta": {
"Model Name": "Navid-AI/Yehia-7B-preview",
"License": "Open",
"Revision": "main",
"Precision": "bfloat16",
"Params": 6.524,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2301,
"Completeness": 0.2173,
"Conciseness": 0.0376,
"Helpfulness": 0.1323,
"Honesty": 0.2117,
"Harmlessness": 0.2107,
"3C3H Score": 0.1733
},
"Tasks Scores": {
"Question Answering (QA)": 0.0706,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.5365,
"Reasoning": 0.3358
}
},
"Meta": {
"Model Name": "Qwen/QwQ-32B-Preview",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 32.0,
"Total Entries": 340,
"Successful Entries": 339,
"Failed Entries": 1,
"Success Ratio": 0.9971
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3088,
"Completeness": 0.3069,
"Conciseness": 0.0137,
"Helpfulness": 0.223,
"Honesty": 0.2953,
"Harmlessness": 0.3074,
"3C3H Score": 0.2425
},
"Tasks Scores": {
"Question Answering (QA)": 0.149,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.1906,
"Reasoning": 0.6435
}
},
"Meta": {
"Model Name": "Qwen/QwQ-32B",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 32.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.0944,
"Completeness": 0.0855,
"Conciseness": 0.0339,
"Helpfulness": 0.0723,
"Honesty": 0.0819,
"Harmlessness": 0.0878,
"3C3H Score": 0.076
},
"Tasks Scores": {
"Question Answering (QA)": 0.0469,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.399,
"Reasoning": 0.0065
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-0.5B-Instruct",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 0.465,
"Total Entries": 340,
"Successful Entries": 339,
"Failed Entries": 1,
"Success Ratio": 0.9971
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.1882,
"Completeness": 0.1882,
"Conciseness": 0.1096,
"Helpfulness": 0.1596,
"Honesty": 0.1846,
"Harmlessness": 0.1846,
"3C3H Score": 0.1691
},
"Tasks Scores": {
"Question Answering (QA)": 0.0465,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.6979,
"Reasoning": 0.2899
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-1.5B-Instruct",
"License": "qwen",
"Revision": "main",
"Precision": "bfloat16",
"Params": 1.443,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3833,
"Completeness": 0.3647,
"Conciseness": 0.1978,
"Helpfulness": 0.3652,
"Honesty": 0.376,
"Harmlessness": 0.3826,
"3C3H Score": 0.3449
},
"Tasks Scores": {
"Question Answering (QA)": 0.1585,
"Orthographic and Grammatical Analysis": 0.0306,
"Safety": 0.8281,
"Reasoning": 0.7363
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-14B-Instruct",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 14.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4235,
"Completeness": 0.3922,
"Conciseness": 0.2162,
"Helpfulness": 0.3971,
"Honesty": 0.4132,
"Harmlessness": 0.4223,
"3C3H Score": 0.3774
},
"Tasks Scores": {
"Question Answering (QA)": 0.2031,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.8188,
"Reasoning": 0.7851
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-32B-Instruct",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 32.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2598,
"Completeness": 0.2598,
"Conciseness": 0.1304,
"Helpfulness": 0.2431,
"Honesty": 0.2559,
"Harmlessness": 0.2561,
"3C3H Score": 0.2342
},
"Tasks Scores": {
"Question Answering (QA)": 0.0665,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.8646,
"Reasoning": 0.4536
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-3B-Instruct",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 3.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3304,
"Completeness": 0.2832,
"Conciseness": 0.1927,
"Helpfulness": 0.2898,
"Honesty": 0.3142,
"Harmlessness": 0.3267,
"3C3H Score": 0.2895
},
"Tasks Scores": {
"Question Answering (QA)": 0.2124,
"Orthographic and Grammatical Analysis": 0.0194,
"Safety": 0.8448,
"Reasoning": 0.3071
}
},
"Meta": {
"Model Name": "inceptionai/jais-adapted-13b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 13.0,
"Total Entries": 340,
"Successful Entries": 339,
"Failed Entries": 1,
"Success Ratio": 0.9971
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4206,
"Completeness": 0.3716,
"Conciseness": 0.1875,
"Helpfulness": 0.3752,
"Honesty": 0.3912,
"Harmlessness": 0.4199,
"3C3H Score": 0.361
},
"Tasks Scores": {
"Question Answering (QA)": 0.2878,
"Orthographic and Grammatical Analysis": 0.0306,
"Safety": 0.8188,
"Reasoning": 0.45
}
},
"Meta": {
"Model Name": "inceptionai/jais-adapted-70b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 70.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2627,
"Completeness": 0.2392,
"Conciseness": 0.1206,
"Helpfulness": 0.2424,
"Honesty": 0.2468,
"Harmlessness": 0.2627,
"3C3H Score": 0.2291
},
"Tasks Scores": {
"Question Answering (QA)": 0.1511,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7479,
"Reasoning": 0.2536
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-13b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 13.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2108,
"Completeness": 0.1971,
"Conciseness": 0.077,
"Helpfulness": 0.1828,
"Honesty": 0.189,
"Harmlessness": 0.2064,
"3C3H Score": 0.1772
},
"Tasks Scores": {
"Question Answering (QA)": 0.111,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7052,
"Reasoning": 0.1405
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-2p7b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 3.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3048,
"Completeness": 0.2793,
"Conciseness": 0.1362,
"Helpfulness": 0.2778,
"Honesty": 0.282,
"Harmlessness": 0.3041,
"3C3H Score": 0.264
},
"Tasks Scores": {
"Question Answering (QA)": 0.1863,
"Orthographic and Grammatical Analysis": 0.0222,
"Safety": 0.7521,
"Reasoning": 0.3095
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-30b-16k-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 30.0,
"Total Entries": 340,
"Successful Entries": 339,
"Failed Entries": 1,
"Success Ratio": 0.9971
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2784,
"Completeness": 0.2569,
"Conciseness": 0.1275,
"Helpfulness": 0.2485,
"Honesty": 0.2632,
"Harmlessness": 0.2755,
"3C3H Score": 0.2417
},
"Tasks Scores": {
"Question Answering (QA)": 0.1665,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7177,
"Reasoning": 0.2881
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-30b-8k-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 30.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.0725,
"Completeness": 0.0637,
"Conciseness": 0.0228,
"Helpfulness": 0.0483,
"Honesty": 0.0556,
"Harmlessness": 0.0713,
"3C3H Score": 0.0557
},
"Tasks Scores": {
"Question Answering (QA)": 0.046,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.174,
"Reasoning": 0.0399
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-590m-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 0.719,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2275,
"Completeness": 0.1961,
"Conciseness": 0.0995,
"Helpfulness": 0.2029,
"Honesty": 0.2078,
"Harmlessness": 0.2238,
"3C3H Score": 0.1929
},
"Tasks Scores": {
"Question Answering (QA)": 0.1413,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.6208,
"Reasoning": 0.1786
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-6p7b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 7.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.0029,
"Completeness": 0.0029,
"Conciseness": 0.0,
"Helpfulness": 0.0007,
"Honesty": 0.0029,
"Harmlessness": 0.0029,
"3C3H Score": 0.0021
},
"Tasks Scores": {
"Question Answering (QA)": 0.0035,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.0,
"Reasoning": 0.0
}
},
"Meta": {
"Model Name": "kyutai/helium-1-preview-2b",
"License": "cc-by-4.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 2.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4029,
"Completeness": 0.3804,
"Conciseness": 0.1877,
"Helpfulness": 0.3748,
"Honesty": 0.3882,
"Harmlessness": 0.3983,
"3C3H Score": 0.3554
},
"Tasks Scores": {
"Question Answering (QA)": 0.1775,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7729,
"Reasoning": 0.7774
}
},
"Meta": {
"Model Name": "maldv/Qwentile2.5-32B-Instruct",
"License": "Open",
"Revision": "main",
"Precision": "float16",
"Params": 32.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3598,
"Completeness": 0.3029,
"Conciseness": 0.2534,
"Helpfulness": 0.3287,
"Honesty": 0.3495,
"Harmlessness": 0.3588,
"3C3H Score": 0.3255
},
"Tasks Scores": {
"Question Answering (QA)": 0.2192,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.8729,
"Reasoning": 0.456
}
},
"Meta": {
"Model Name": "gpt-3.5-turbo-0125",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4876,
"Completeness": 0.4748,
"Conciseness": 0.202,
"Helpfulness": 0.4696,
"Honesty": 0.4716,
"Harmlessness": 0.4874,
"3C3H Score": 0.4322
},
"Tasks Scores": {
"Question Answering (QA)": 0.2962,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.75,
"Reasoning": 0.8185
}
},
"Meta": {
"Model Name": "rombodawg/Rombos-LLM-V2.5-Qwen-72b",
"License": "qwen",
"Revision": "main",
"Precision": "bfloat16",
"Params": 72.0,
"Total Entries": 340,
"Successful Entries": 337,
"Failed Entries": 3,
"Success Ratio": 0.9912
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2029,
"Completeness": 0.1882,
"Conciseness": 0.1096,
"Helpfulness": 0.1772,
"Honesty": 0.1941,
"Harmlessness": 0.2007,
"3C3H Score": 0.1788
},
"Tasks Scores": {
"Question Answering (QA)": 0.0802,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7886,
"Reasoning": 0.1887
}
},
"Meta": {
"Model Name": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0",
"License": "Gemma",
"Revision": "main",
"Precision": "bfloat16",
"Params": 2.453,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.1082,
"Completeness": 0.0442,
"Conciseness": 0.0039,
"Helpfulness": 0.0263,
"Honesty": 0.0624,
"Harmlessness": 0.101,
"3C3H Score": 0.0577
},
"Tasks Scores": {
"Question Answering (QA)": 0.0882,
"Orthographic and Grammatical Analysis": 0.0125,
"Safety": 0.0,
"Reasoning": 0.022
}
},
"Meta": {
"Model Name": "stabilityai/ar-stablelm-2-chat",
"License": "other",
"Revision": "main",
"Precision": "float32",
"Params": 2.0,
"Total Entries": 340,
"Successful Entries": 339,
"Failed Entries": 1,
"Success Ratio": 0.9971
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3431,
"Completeness": 0.2892,
"Conciseness": 0.1588,
"Helpfulness": 0.288,
"Honesty": 0.3208,
"Harmlessness": 0.3431,
"3C3H Score": 0.2905
},
"Tasks Scores": {
"Question Answering (QA)": 0.2097,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.8677,
"Reasoning": 0.3161
}
},
"Meta": {
"Model Name": "utter-project/EuroLLM-9B-Instruct",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 9.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2363,
"Completeness": 0.2255,
"Conciseness": 0.1157,
"Helpfulness": 0.2238,
"Honesty": 0.2299,
"Harmlessness": 0.2363,
"3C3H Score": 0.2112
},
"Tasks Scores": {
"Question Answering (QA)": 0.1266,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.4261,
"Reasoning": 0.4208
}
},
"Meta": {
"Model Name": "CohereForAI/c4ai-command-r7b-12-2024",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 8.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3206,
"Completeness": 0.3147,
"Conciseness": 0.1387,
"Helpfulness": 0.3103,
"Honesty": 0.3096,
"Harmlessness": 0.3199,
"3C3H Score": 0.2856
},
"Tasks Scores": {
"Question Answering (QA)": 0.1514,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.6552,
"Reasoning": 0.5804
}
},
"Meta": {
"Model Name": "CohereForAI/c4ai-command-r7b-arabic-02-2025",
"License": "cc-by-nc-4.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 8.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.1765,
"Completeness": 0.0931,
"Conciseness": 0.1333,
"Helpfulness": 0.1201,
"Honesty": 0.1681,
"Harmlessness": 0.175,
"3C3H Score": 0.1444
},
"Tasks Scores": {
"Question Answering (QA)": 0.1533,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.3083,
"Reasoning": 0.0869
}
},
"Meta": {
"Model Name": "FreedomIntelligence/AceGPT-v1.5-13B-Chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 13.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3598,
"Completeness": 0.2961,
"Conciseness": 0.2625,
"Helpfulness": 0.3208,
"Honesty": 0.3532,
"Harmlessness": 0.3591,
"3C3H Score": 0.3252
},
"Tasks Scores": {
"Question Answering (QA)": 0.1946,
"Orthographic and Grammatical Analysis": 0.0333,
"Safety": 0.9083,
"Reasoning": 0.4905
}
},
"Meta": {
"Model Name": "FreedomIntelligence/AceGPT-v2-32B-Chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float16",
"Params": 32.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4343,
"Completeness": 0.3235,
"Conciseness": 0.3216,
"Helpfulness": 0.3755,
"Honesty": 0.424,
"Harmlessness": 0.4336,
"3C3H Score": 0.3854
},
"Tasks Scores": {
"Question Answering (QA)": 0.3131,
"Orthographic and Grammatical Analysis": 0.025,
"Safety": 0.8875,
"Reasoning": 0.4595
}
},
"Meta": {
"Model Name": "FreedomIntelligence/AceGPT-v2-70B-Chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float16",
"Params": 70.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3275,
"Completeness": 0.3108,
"Conciseness": 0.1395,
"Helpfulness": 0.3081,
"Honesty": 0.3174,
"Harmlessness": 0.326,
"3C3H Score": 0.2882
},
"Tasks Scores": {
"Question Answering (QA)": 0.1199,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7729,
"Reasoning": 0.6155
}
},
"Meta": {
"Model Name": "Qwen/Qwen2.5-7B-Instruct",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 7.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4098,
"Completeness": 0.3539,
"Conciseness": 0.2368,
"Helpfulness": 0.3792,
"Honesty": 0.3887,
"Harmlessness": 0.4098,
"3C3H Score": 0.363
},
"Tasks Scores": {
"Question Answering (QA)": 0.2707,
"Orthographic and Grammatical Analysis": 0.0514,
"Safety": 0.8927,
"Reasoning": 0.4577
}
},
"Meta": {
"Model Name": "claude-3-haiku-20240307",
"License": "Proprietary",
"Revision": "UNK",
"Precision": "UNK",
"Params": "UNK",
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3931,
"Completeness": 0.3765,
"Conciseness": 0.211,
"Helpfulness": 0.377,
"Honesty": 0.3843,
"Harmlessness": 0.3931,
"3C3H Score": 0.3558
},
"Tasks Scores": {
"Question Answering (QA)": 0.2201,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.8865,
"Reasoning": 0.5929
}
},
"Meta": {
"Model Name": "google/gemma-2-27b-it",
"License": "gemma",
"Revision": "main",
"Precision": "bfloat16",
"Params": 27.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3343,
"Completeness": 0.3196,
"Conciseness": 0.1861,
"Helpfulness": 0.323,
"Honesty": 0.3294,
"Harmlessness": 0.3336,
"3C3H Score": 0.3043
},
"Tasks Scores": {
"Question Answering (QA)": 0.1633,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.8875,
"Reasoning": 0.5072
}
},
"Meta": {
"Model Name": "google/gemma-2-9b-it",
"License": "gemma",
"Revision": "main",
"Precision": "bfloat16",
"Params": 9.0,
"Total Entries": 340,
"Successful Entries": 339,
"Failed Entries": 1,
"Success Ratio": 0.9971
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4888,
"Completeness": 0.4792,
"Conciseness": 0.1976,
"Helpfulness": 0.4662,
"Honesty": 0.4702,
"Harmlessness": 0.488,
"3C3H Score": 0.4317
},
"Tasks Scores": {
"Question Answering (QA)": 0.2443,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7927,
"Reasoning": 0.8
}
},
"Meta": {
"Model Name": "google/gemma-3-12b-it",
"License": "gemma",
"Revision": "main",
"Precision": "bfloat16",
"Params": 12.0,
"Total Entries": 340,
"Successful Entries": 313,
"Failed Entries": 27,
"Success Ratio": 0.9206
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2101,
"Completeness": 0.2041,
"Conciseness": 0.0466,
"Helpfulness": 0.1834,
"Honesty": 0.1997,
"Harmlessness": 0.2034,
"3C3H Score": 0.1746
},
"Tasks Scores": {
"Question Answering (QA)": 0.0694,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.7292,
"Reasoning": 0.2298
}
},
"Meta": {
"Model Name": "google/gemma-3-1b-it",
"License": "gemma",
"Revision": "main",
"Precision": "bfloat16",
"Params": 1.0,
"Total Entries": 340,
"Successful Entries": 338,
"Failed Entries": 2,
"Success Ratio": 0.9941
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.5231,
"Completeness": 0.5064,
"Conciseness": 0.1868,
"Helpfulness": 0.4939,
"Honesty": 0.5044,
"Harmlessness": 0.5172,
"3C3H Score": 0.4553
},
"Tasks Scores": {
"Question Answering (QA)": 0.3213,
"Orthographic and Grammatical Analysis": 0.0292,
"Safety": 0.7724,
"Reasoning": 0.8441
}
},
"Meta": {
"Model Name": "google/gemma-3-27b-it",
"License": "gemma",
"Revision": "main",
"Precision": "bfloat16",
"Params": 27.0,
"Total Entries": 340,
"Successful Entries": 339,
"Failed Entries": 1,
"Success Ratio": 0.9971
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3392,
"Completeness": 0.3363,
"Conciseness": 0.1088,
"Helpfulness": 0.3186,
"Honesty": 0.3316,
"Harmlessness": 0.337,
"3C3H Score": 0.2953
},
"Tasks Scores": {
"Question Answering (QA)": 0.1067,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.8229,
"Reasoning": 0.6589
}
},
"Meta": {
"Model Name": "google/gemma-3-4b-it",
"License": "gemma",
"Revision": "main",
"Precision": "bfloat16",
"Params": 4.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.1667,
"Completeness": 0.1627,
"Conciseness": 0.0603,
"Helpfulness": 0.1392,
"Honesty": 0.1439,
"Harmlessness": 0.1615,
"3C3H Score": 0.1391
},
"Tasks Scores": {
"Question Answering (QA)": 0.0885,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.3938,
"Reasoning": 0.1976
}
},
"Meta": {
"Model Name": "inceptionai/jais-family-1p3b-chat",
"License": "apache-2.0",
"Revision": "main",
"Precision": "float32",
"Params": 1.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.3931,
"Completeness": 0.3441,
"Conciseness": 0.2596,
"Helpfulness": 0.361,
"Honesty": 0.3784,
"Harmlessness": 0.3895,
"3C3H Score": 0.3543
},
"Tasks Scores": {
"Question Answering (QA)": 0.2044,
"Orthographic and Grammatical Analysis": 0.0333,
"Safety": 0.8719,
"Reasoning": 0.6244
}
},
"Meta": {
"Model Name": "malhajar/Shahin-v0.1",
"License": "Open",
"Revision": "main",
"Precision": "float16",
"Params": 27.519,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.4225,
"Completeness": 0.3569,
"Conciseness": 0.3252,
"Helpfulness": 0.3777,
"Honesty": 0.4147,
"Harmlessness": 0.4218,
"3C3H Score": 0.3865
},
"Tasks Scores": {
"Question Answering (QA)": 0.2353,
"Orthographic and Grammatical Analysis": 0.025,
"Safety": 0.8542,
"Reasoning": 0.706
}
},
"Meta": {
"Model Name": "meta-llama/Llama-3.1-70B-Instruct",
"License": "llama3.1",
"Revision": "main",
"Precision": "bfloat16",
"Params": 70.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2971,
"Completeness": 0.2686,
"Conciseness": 0.1968,
"Helpfulness": 0.261,
"Honesty": 0.2814,
"Harmlessness": 0.2971,
"3C3H Score": 0.267
},
"Tasks Scores": {
"Question Answering (QA)": 0.1176,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.8792,
"Reasoning": 0.4583
}
},
"Meta": {
"Model Name": "meta-llama/Llama-3.1-8B-Instruct",
"License": "llama3.1",
"Revision": "main",
"Precision": "bfloat16",
"Params": 8.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.1353,
"Completeness": 0.1176,
"Conciseness": 0.0875,
"Helpfulness": 0.1007,
"Honesty": 0.1213,
"Harmlessness": 0.1301,
"3C3H Score": 0.1154
},
"Tasks Scores": {
"Question Answering (QA)": 0.0479,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.5875,
"Reasoning": 0.0881
}
},
"Meta": {
"Model Name": "meta-llama/Llama-3.2-1B-Instruct",
"License": "llama3.2",
"Revision": "main",
"Precision": "bfloat16",
"Params": 1.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2468,
"Completeness": 0.2271,
"Conciseness": 0.1657,
"Helpfulness": 0.204,
"Honesty": 0.2335,
"Harmlessness": 0.2424,
"3C3H Score": 0.2199
},
"Tasks Scores": {
"Question Answering (QA)": 0.0782,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.9021,
"Reasoning": 0.3274
}
},
"Meta": {
"Model Name": "meta-llama/Llama-3.2-3B-Instruct",
"License": "llama3.2",
"Revision": "main",
"Precision": "bfloat16",
"Params": 3.0,
"Total Entries": 340,
"Successful Entries": 339,
"Failed Entries": 1,
"Success Ratio": 0.9971
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.448,
"Completeness": 0.3725,
"Conciseness": 0.3586,
"Helpfulness": 0.3939,
"Honesty": 0.4402,
"Harmlessness": 0.4478,
"3C3H Score": 0.4102
},
"Tasks Scores": {
"Question Answering (QA)": 0.2719,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.8792,
"Reasoning": 0.7131
}
},
"Meta": {
"Model Name": "meta-llama/Llama-3.3-70B-Instruct",
"License": "llama3.3",
"Revision": "main",
"Precision": "bfloat16",
"Params": 70.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.0686,
"Completeness": 0.0657,
"Conciseness": 0.036,
"Helpfulness": 0.0615,
"Honesty": 0.0662,
"Harmlessness": 0.0684,
"3C3H Score": 0.0611
},
"Tasks Scores": {
"Question Answering (QA)": 0.044,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.0,
"Reasoning": 0.1708
}
},
"Meta": {
"Model Name": "meta-llama/Meta-Llama-3-70B-Instruct",
"License": "llama3",
"Revision": "main",
"Precision": "bfloat16",
"Params": 70.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.0294,
"Completeness": 0.0294,
"Conciseness": 0.0127,
"Helpfulness": 0.026,
"Honesty": 0.0272,
"Harmlessness": 0.0294,
"3C3H Score": 0.0257
},
"Tasks Scores": {
"Question Answering (QA)": 0.0299,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.0,
"Reasoning": 0.0393
}
},
"Meta": {
"Model Name": "meta-llama/Meta-Llama-3-8B-Instruct",
"License": "llama3",
"Revision": "main",
"Precision": "bfloat16",
"Params": 14.963,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.2667,
"Completeness": 0.2549,
"Conciseness": 0.1257,
"Helpfulness": 0.2368,
"Honesty": 0.2507,
"Harmlessness": 0.2659,
"3C3H Score": 0.2335
},
"Tasks Scores": {
"Question Answering (QA)": 0.1294,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.5042,
"Reasoning": 0.4762
}
},
"Meta": {
"Model Name": "mistralai/Ministral-8B-Instruct-2410",
"License": "mrl",
"Revision": "main",
"Precision": "bfloat16",
"Params": 8.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.0039,
"Completeness": 0.0039,
"Conciseness": 0.0007,
"Helpfulness": 0.0022,
"Honesty": 0.0032,
"Harmlessness": 0.0039,
"3C3H Score": 0.003
},
"Tasks Scores": {
"Question Answering (QA)": 0.0051,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.0,
"Reasoning": 0.0
}
},
"Meta": {
"Model Name": "mistralai/Mistral-7B-Instruct-v0.2",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 7.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.1003,
"Completeness": 0.0826,
"Conciseness": 0.0258,
"Helpfulness": 0.0597,
"Honesty": 0.0774,
"Harmlessness": 0.0966,
"3C3H Score": 0.0737
},
"Tasks Scores": {
"Question Answering (QA)": 0.0431,
"Orthographic and Grammatical Analysis": 0.0,
"Safety": 0.1646,
"Reasoning": 0.1405
}
},
"Meta": {
"Model Name": "mistralai/Mistral-7B-Instruct-v0.3",
"License": "apache-2.0",
"Revision": "main",
"Precision": "bfloat16",
"Params": 7.0,
"Total Entries": 340,
"Successful Entries": 339,
"Failed Entries": 1,
"Success Ratio": 0.9971
}
},
{
"claude-3.5-sonnet Scores": {
"3C3H Scores": {
"Correctness": 0.501,
"Completeness": 0.4794,
"Conciseness": 0.2424,
"Helpfulness": 0.4797,
"Honesty": 0.4875,
"Harmlessness": 0.501,
"3C3H Score": 0.4485
},
"Tasks Scores": {
"Question Answering (QA)": 0.3437,
"Orthographic and Grammatical Analysis": 0.0514,
"Safety": 0.7979,
"Reasoning": 0.7185
}
},
"Meta": {
"Model Name": "mistralai/Mistral-Large-Instruct-2411",
"License": "mrl",
"Revision": "main",
"Precision": "bfloat16",
"Params": 123.0,
"Total Entries": 340,
"Successful Entries": 340,
"Failed Entries": 0,
"Success Ratio": 1.0
}
},
{
"_last_sync_timestamp": "2025-03-23T12:44:33.422103"
}
]