[ { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4882, "Completeness": 0.4755, "Conciseness": 0.1973, "Helpfulness": 0.4659, "Honesty": 0.4711, "Harmlessness": 0.4875, "3C3H Score": 0.4309 }, "Tasks Scores": { "Question Answering (QA)": 0.2919, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7292, "Reasoning": 0.8423 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-72B-Instruct", "License": "qwen", "Revision": "main", "Precision": "bfloat16", "Params": 72.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4892, "Completeness": 0.4451, "Conciseness": 0.324, "Helpfulness": 0.4667, "Honesty": 0.4738, "Harmlessness": 0.4885, "3C3H Score": 0.4479 }, "Tasks Scores": { "Question Answering (QA)": 0.2968, "Orthographic and Grammatical Analysis": 0.0958, "Safety": 0.951, "Reasoning": 0.7429 } }, "Meta": { "Model Name": "claude-3-5-haiku-20241022", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6049, "Completeness": 0.5667, "Conciseness": 0.3914, "Helpfulness": 0.586, "Honesty": 0.585, "Harmlessness": 0.602, "3C3H Score": 0.556 }, "Tasks Scores": { "Question Answering (QA)": 0.4152, "Orthographic and Grammatical Analysis": 0.3625, "Safety": 0.9687, "Reasoning": 0.8054 } }, "Meta": { "Model Name": "claude-3-5-sonnet-20241022", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6225, "Completeness": 0.5853, "Conciseness": 0.3449, "Helpfulness": 0.6039, "Honesty": 0.614, "Harmlessness": 0.6218, "3C3H Score": 0.5654 }, "Tasks Scores": { "Question Answering (QA)": 0.4179, "Orthographic and Grammatical Analysis": 0.4042, "Safety": 0.8698, "Reasoning": 0.8821 } }, "Meta": { "Model Name": "claude-3-7-sonnet-20250219", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5755, "Completeness": 0.5392, "Conciseness": 0.2561, "Helpfulness": 0.5495, "Honesty": 0.5642, "Harmlessness": 0.5755, "3C3H Score": 0.51 }, "Tasks Scores": { "Question Answering (QA)": 0.4041, "Orthographic and Grammatical Analysis": 0.1833, "Safety": 0.7, "Reasoning": 0.8441 } }, "Meta": { "Model Name": "deepseek-chat", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.6314, "Completeness": 0.5667, "Conciseness": 0.3995, "Helpfulness": 0.5966, "Honesty": 0.6179, "Harmlessness": 0.6306, "3C3H Score": 0.5738 }, "Tasks Scores": { "Question Answering (QA)": 0.4704, "Orthographic and Grammatical Analysis": 0.2306, "Safety": 0.9021, "Reasoning": 0.8286 } }, "Meta": { "Model Name": "gpt-4o-2024-08-06", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.451, "Completeness": 0.4088, "Conciseness": 0.276, "Helpfulness": 0.4206, "Honesty": 0.4358, "Harmlessness": 0.4451, "3C3H Score": 0.4062 }, "Tasks Scores": { "Question Answering (QA)": 0.2562, "Orthographic and Grammatical Analysis": 0.0361, "Safety": 0.8677, "Reasoning": 0.7298 } }, "Meta": { "Model Name": "gpt-4o-mini-2024-07-18", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.7588, "Completeness": 0.7098, "Conciseness": 0.5125, "Helpfulness": 0.7255, "Honesty": 0.7525, "Harmlessness": 0.7559, "3C3H Score": 0.7025 }, "Tasks Scores": { "Question Answering (QA)": 0.6051, "Orthographic and Grammatical Analysis": 0.4528, "Safety": 0.9437, "Reasoning": 0.95 } }, "Meta": { "Model Name": "o1-2024-12-17", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4755, "Completeness": 0.4676, "Conciseness": 0.2804, "Helpfulness": 0.4627, "Honesty": 0.4667, "Harmlessness": 0.474, "3C3H Score": 0.4378 }, "Tasks Scores": { "Question Answering (QA)": 0.2435, "Orthographic and Grammatical Analysis": 0.0292, "Safety": 0.8958, "Reasoning": 0.9065 } }, "Meta": { "Model Name": "o1-mini-2024-09-12", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5608, "Completeness": 0.5235, "Conciseness": 0.3672, "Helpfulness": 0.5353, "Honesty": 0.551, "Harmlessness": 0.56, "3C3H Score": 0.5163 }, "Tasks Scores": { "Question Answering (QA)": 0.3458, "Orthographic and Grammatical Analysis": 0.0875, "Safety": 0.9448, "Reasoning": 0.9423 } }, "Meta": { "Model Name": "o3-mini-2025-01-31", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3088, "Completeness": 0.2461, "Conciseness": 0.1998, "Helpfulness": 0.2674, "Honesty": 0.2956, "Harmlessness": 0.3081, "3C3H Score": 0.271 }, "Tasks Scores": { "Question Answering (QA)": 0.1979, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7854, "Reasoning": 0.3018 } }, "Meta": { "Model Name": "Mohaddz/Thinking-Camel-7b", "License": "Open", "Revision": "main", "Precision": "float16", "Params": 7.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3735, "Completeness": 0.3539, "Conciseness": 0.1699, "Helpfulness": 0.3554, "Honesty": 0.3625, "Harmlessness": 0.3735, "3C3H Score": 0.3315 }, "Tasks Scores": { "Question Answering (QA)": 0.1528, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7521, "Reasoning": 0.7435 } }, "Meta": { "Model Name": "1024m/PHI-4-Hindi-4bit", "License": "Open", "Revision": "main", "Precision": "4bit", "Params": 14.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3147, "Completeness": 0.2529, "Conciseness": 0.2027, "Helpfulness": 0.2713, "Honesty": 0.2988, "Harmlessness": 0.3088, "3C3H Score": 0.2749 }, "Tasks Scores": { "Question Answering (QA)": 0.1996, "Orthographic and Grammatical Analysis": 0.0056, "Safety": 0.7625, "Reasoning": 0.3268 } }, "Meta": { "Model Name": "ALLaM-AI/ALLaM-7B-Instruct-preview", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 7.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2451, "Completeness": 0.2059, "Conciseness": 0.1282, "Helpfulness": 0.2088, "Honesty": 0.2375, "Harmlessness": 0.2436, "3C3H Score": 0.2115 }, "Tasks Scores": { "Question Answering (QA)": 0.1927, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.4146, "Reasoning": 0.2399 } }, "Meta": { "Model Name": "CohereForAI/aya-23-35B", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 35.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.1765, "Completeness": 0.1461, "Conciseness": 0.0929, "Helpfulness": 0.1502, "Honesty": 0.1725, "Harmlessness": 0.1757, "3C3H Score": 0.1523 }, "Tasks Scores": { "Question Answering (QA)": 0.1296, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.4844, "Reasoning": 0.0929 } }, "Meta": { "Model Name": "CohereForAI/aya-23-8B", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 8.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3795, "Completeness": 0.3618, "Conciseness": 0.1401, "Helpfulness": 0.3545, "Honesty": 0.3582, "Harmlessness": 0.3744, "3C3H Score": 0.3281 }, "Tasks Scores": { "Question Answering (QA)": 0.2394, "Orthographic and Grammatical Analysis": 0.0556, "Safety": 0.6823, "Reasoning": 0.4946 } }, "Meta": { "Model Name": "CohereForAI/aya-expanse-32b", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 32.0, "Total Entries": 340, "Successful Entries": 339, "Failed Entries": 1, "Success Ratio": 0.9971 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3029, "Completeness": 0.2882, "Conciseness": 0.1022, "Helpfulness": 0.2841, "Honesty": 0.2902, "Harmlessness": 0.3015, "3C3H Score": 0.2615 }, "Tasks Scores": { "Question Answering (QA)": 0.174, "Orthographic and Grammatical Analysis": 0.0319, "Safety": 0.6531, "Reasoning": 0.3863 } }, "Meta": { "Model Name": "CohereForAI/aya-expanse-8b", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 8.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5412, "Completeness": 0.5275, "Conciseness": 0.2047, "Helpfulness": 0.5284, "Honesty": 0.5287, "Harmlessness": 0.5397, "3C3H Score": 0.4783 }, "Tasks Scores": { "Question Answering (QA)": 0.3701, "Orthographic and Grammatical Analysis": 0.1444, "Safety": 0.7604, "Reasoning": 0.7696 } }, "Meta": { "Model Name": "CohereForAI/c4ai-command-a-03-2025", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "bfloat16", "Params": 111.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3235, "Completeness": 0.2742, "Conciseness": 0.162, "Helpfulness": 0.2818, "Honesty": 0.3119, "Harmlessness": 0.3235, "3C3H Score": 0.2795 }, "Tasks Scores": { "Question Answering (QA)": 0.2439, "Orthographic and Grammatical Analysis": 0.0333, "Safety": 0.4042, "Reasoning": 0.4143 } }, "Meta": { "Model Name": "CohereForAI/c4ai-command-r-08-2024", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 32.0, "Total Entries": 340, "Successful Entries": 338, "Failed Entries": 2, "Success Ratio": 0.9941 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3529, "Completeness": 0.3137, "Conciseness": 0.1652, "Helpfulness": 0.3069, "Honesty": 0.3363, "Harmlessness": 0.3485, "3C3H Score": 0.3039 }, "Tasks Scores": { "Question Answering (QA)": 0.2773, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.3646, "Reasoning": 0.4756 } }, "Meta": { "Model Name": "CohereForAI/c4ai-command-r-plus-08-2024", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 104.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3667, "Completeness": 0.302, "Conciseness": 0.1968, "Helpfulness": 0.3132, "Honesty": 0.3559, "Harmlessness": 0.3667, "3C3H Score": 0.3169 }, "Tasks Scores": { "Question Answering (QA)": 0.2866, "Orthographic and Grammatical Analysis": 0.0639, "Safety": 0.6469, "Reasoning": 0.3232 } }, "Meta": { "Model Name": "CohereForAI/c4ai-command-r-plus", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 104.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2517, "Completeness": 0.2104, "Conciseness": 0.115, "Helpfulness": 0.2099, "Honesty": 0.237, "Harmlessness": 0.2495, "3C3H Score": 0.2123 }, "Tasks Scores": { "Question Answering (QA)": 0.2255, "Orthographic and Grammatical Analysis": 0.0333, "Safety": 0.2937, "Reasoning": 0.2048 } }, "Meta": { "Model Name": "CohereForAI/c4ai-command-r-v01", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "float16", "Params": 35.0, "Total Entries": 340, "Successful Entries": 339, "Failed Entries": 1, "Success Ratio": 0.9971 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4569, "Completeness": 0.452, "Conciseness": 0.1904, "Helpfulness": 0.4365, "Honesty": 0.4373, "Harmlessness": 0.4554, "3C3H Score": 0.4047 }, "Tasks Scores": { "Question Answering (QA)": 0.2712, "Orthographic and Grammatical Analysis": 0.0278, "Safety": 0.8031, "Reasoning": 0.7202 } }, "Meta": { "Model Name": "MaziyarPanahi/calme-2.1-qwen2.5-72b", "License": "tongyi-qianwen", "Revision": "main", "Precision": "bfloat16", "Params": 72.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4745, "Completeness": 0.4716, "Conciseness": 0.2025, "Helpfulness": 0.4603, "Honesty": 0.4581, "Harmlessness": 0.4745, "3C3H Score": 0.4236 }, "Tasks Scores": { "Question Answering (QA)": 0.2809, "Orthographic and Grammatical Analysis": 0.0542, "Safety": 0.8011, "Reasoning": 0.7738 } }, "Meta": { "Model Name": "MaziyarPanahi/calme-2.2-qwen2.5-72b", "License": "tongyi-qianwen", "Revision": "main", "Precision": "bfloat16", "Params": 72.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3108, "Completeness": 0.2471, "Conciseness": 0.2005, "Helpfulness": 0.2672, "Honesty": 0.299, "Harmlessness": 0.31, "3C3H Score": 0.2724 }, "Tasks Scores": { "Question Answering (QA)": 0.2002, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7865, "Reasoning": 0.3018 } }, "Meta": { "Model Name": "Mohaddz/Thinking-cow-7B", "License": "Apache license 2.0", "Revision": "main", "Precision": "float16", "Params": 7.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3275, "Completeness": 0.2284, "Conciseness": 0.2463, "Helpfulness": 0.2613, "Honesty": 0.3159, "Harmlessness": 0.3275, "3C3H Score": 0.2845 }, "Tasks Scores": { "Question Answering (QA)": 0.2005, "Orthographic and Grammatical Analysis": 0.0444, "Safety": 0.8302, "Reasoning": 0.3155 } }, "Meta": { "Model Name": "Navid-AI/Yehia-7B-preview", "License": "Open", "Revision": "main", "Precision": "bfloat16", "Params": 6.524, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2301, "Completeness": 0.2173, "Conciseness": 0.0376, "Helpfulness": 0.1323, "Honesty": 0.2117, "Harmlessness": 0.2107, "3C3H Score": 0.1733 }, "Tasks Scores": { "Question Answering (QA)": 0.0706, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.5365, "Reasoning": 0.3358 } }, "Meta": { "Model Name": "Qwen/QwQ-32B-Preview", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 32.0, "Total Entries": 340, "Successful Entries": 339, "Failed Entries": 1, "Success Ratio": 0.9971 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3088, "Completeness": 0.3069, "Conciseness": 0.0137, "Helpfulness": 0.223, "Honesty": 0.2953, "Harmlessness": 0.3074, "3C3H Score": 0.2425 }, "Tasks Scores": { "Question Answering (QA)": 0.149, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.1906, "Reasoning": 0.6435 } }, "Meta": { "Model Name": "Qwen/QwQ-32B", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 32.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.0944, "Completeness": 0.0855, "Conciseness": 0.0339, "Helpfulness": 0.0723, "Honesty": 0.0819, "Harmlessness": 0.0878, "3C3H Score": 0.076 }, "Tasks Scores": { "Question Answering (QA)": 0.0469, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.399, "Reasoning": 0.0065 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-0.5B-Instruct", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 0.465, "Total Entries": 340, "Successful Entries": 339, "Failed Entries": 1, "Success Ratio": 0.9971 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.1882, "Completeness": 0.1882, "Conciseness": 0.1096, "Helpfulness": 0.1596, "Honesty": 0.1846, "Harmlessness": 0.1846, "3C3H Score": 0.1691 }, "Tasks Scores": { "Question Answering (QA)": 0.0465, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.6979, "Reasoning": 0.2899 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-1.5B-Instruct", "License": "qwen", "Revision": "main", "Precision": "bfloat16", "Params": 1.443, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3833, "Completeness": 0.3647, "Conciseness": 0.1978, "Helpfulness": 0.3652, "Honesty": 0.376, "Harmlessness": 0.3826, "3C3H Score": 0.3449 }, "Tasks Scores": { "Question Answering (QA)": 0.1585, "Orthographic and Grammatical Analysis": 0.0306, "Safety": 0.8281, "Reasoning": 0.7363 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-14B-Instruct", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 14.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4235, "Completeness": 0.3922, "Conciseness": 0.2162, "Helpfulness": 0.3971, "Honesty": 0.4132, "Harmlessness": 0.4223, "3C3H Score": 0.3774 }, "Tasks Scores": { "Question Answering (QA)": 0.2031, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.8188, "Reasoning": 0.7851 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-32B-Instruct", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 32.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2598, "Completeness": 0.2598, "Conciseness": 0.1304, "Helpfulness": 0.2431, "Honesty": 0.2559, "Harmlessness": 0.2561, "3C3H Score": 0.2342 }, "Tasks Scores": { "Question Answering (QA)": 0.0665, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.8646, "Reasoning": 0.4536 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-3B-Instruct", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 3.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3304, "Completeness": 0.2832, "Conciseness": 0.1927, "Helpfulness": 0.2898, "Honesty": 0.3142, "Harmlessness": 0.3267, "3C3H Score": 0.2895 }, "Tasks Scores": { "Question Answering (QA)": 0.2124, "Orthographic and Grammatical Analysis": 0.0194, "Safety": 0.8448, "Reasoning": 0.3071 } }, "Meta": { "Model Name": "inceptionai/jais-adapted-13b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 13.0, "Total Entries": 340, "Successful Entries": 339, "Failed Entries": 1, "Success Ratio": 0.9971 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4206, "Completeness": 0.3716, "Conciseness": 0.1875, "Helpfulness": 0.3752, "Honesty": 0.3912, "Harmlessness": 0.4199, "3C3H Score": 0.361 }, "Tasks Scores": { "Question Answering (QA)": 0.2878, "Orthographic and Grammatical Analysis": 0.0306, "Safety": 0.8188, "Reasoning": 0.45 } }, "Meta": { "Model Name": "inceptionai/jais-adapted-70b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 70.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2627, "Completeness": 0.2392, "Conciseness": 0.1206, "Helpfulness": 0.2424, "Honesty": 0.2468, "Harmlessness": 0.2627, "3C3H Score": 0.2291 }, "Tasks Scores": { "Question Answering (QA)": 0.1511, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7479, "Reasoning": 0.2536 } }, "Meta": { "Model Name": "inceptionai/jais-family-13b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 13.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2108, "Completeness": 0.1971, "Conciseness": 0.077, "Helpfulness": 0.1828, "Honesty": 0.189, "Harmlessness": 0.2064, "3C3H Score": 0.1772 }, "Tasks Scores": { "Question Answering (QA)": 0.111, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7052, "Reasoning": 0.1405 } }, "Meta": { "Model Name": "inceptionai/jais-family-2p7b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 3.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3048, "Completeness": 0.2793, "Conciseness": 0.1362, "Helpfulness": 0.2778, "Honesty": 0.282, "Harmlessness": 0.3041, "3C3H Score": 0.264 }, "Tasks Scores": { "Question Answering (QA)": 0.1863, "Orthographic and Grammatical Analysis": 0.0222, "Safety": 0.7521, "Reasoning": 0.3095 } }, "Meta": { "Model Name": "inceptionai/jais-family-30b-16k-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 30.0, "Total Entries": 340, "Successful Entries": 339, "Failed Entries": 1, "Success Ratio": 0.9971 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2784, "Completeness": 0.2569, "Conciseness": 0.1275, "Helpfulness": 0.2485, "Honesty": 0.2632, "Harmlessness": 0.2755, "3C3H Score": 0.2417 }, "Tasks Scores": { "Question Answering (QA)": 0.1665, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7177, "Reasoning": 0.2881 } }, "Meta": { "Model Name": "inceptionai/jais-family-30b-8k-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 30.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.0725, "Completeness": 0.0637, "Conciseness": 0.0228, "Helpfulness": 0.0483, "Honesty": 0.0556, "Harmlessness": 0.0713, "3C3H Score": 0.0557 }, "Tasks Scores": { "Question Answering (QA)": 0.046, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.174, "Reasoning": 0.0399 } }, "Meta": { "Model Name": "inceptionai/jais-family-590m-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 0.719, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2275, "Completeness": 0.1961, "Conciseness": 0.0995, "Helpfulness": 0.2029, "Honesty": 0.2078, "Harmlessness": 0.2238, "3C3H Score": 0.1929 }, "Tasks Scores": { "Question Answering (QA)": 0.1413, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.6208, "Reasoning": 0.1786 } }, "Meta": { "Model Name": "inceptionai/jais-family-6p7b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 7.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.0029, "Completeness": 0.0029, "Conciseness": 0.0, "Helpfulness": 0.0007, "Honesty": 0.0029, "Harmlessness": 0.0029, "3C3H Score": 0.0021 }, "Tasks Scores": { "Question Answering (QA)": 0.0035, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.0, "Reasoning": 0.0 } }, "Meta": { "Model Name": "kyutai/helium-1-preview-2b", "License": "cc-by-4.0", "Revision": "main", "Precision": "bfloat16", "Params": 2.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4029, "Completeness": 0.3804, "Conciseness": 0.1877, "Helpfulness": 0.3748, "Honesty": 0.3882, "Harmlessness": 0.3983, "3C3H Score": 0.3554 }, "Tasks Scores": { "Question Answering (QA)": 0.1775, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7729, "Reasoning": 0.7774 } }, "Meta": { "Model Name": "maldv/Qwentile2.5-32B-Instruct", "License": "Open", "Revision": "main", "Precision": "float16", "Params": 32.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3598, "Completeness": 0.3029, "Conciseness": 0.2534, "Helpfulness": 0.3287, "Honesty": 0.3495, "Harmlessness": 0.3588, "3C3H Score": 0.3255 }, "Tasks Scores": { "Question Answering (QA)": 0.2192, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.8729, "Reasoning": 0.456 } }, "Meta": { "Model Name": "gpt-3.5-turbo-0125", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4876, "Completeness": 0.4748, "Conciseness": 0.202, "Helpfulness": 0.4696, "Honesty": 0.4716, "Harmlessness": 0.4874, "3C3H Score": 0.4322 }, "Tasks Scores": { "Question Answering (QA)": 0.2962, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.75, "Reasoning": 0.8185 } }, "Meta": { "Model Name": "rombodawg/Rombos-LLM-V2.5-Qwen-72b", "License": "qwen", "Revision": "main", "Precision": "bfloat16", "Params": 72.0, "Total Entries": 340, "Successful Entries": 337, "Failed Entries": 3, "Success Ratio": 0.9912 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2029, "Completeness": 0.1882, "Conciseness": 0.1096, "Helpfulness": 0.1772, "Honesty": 0.1941, "Harmlessness": 0.2007, "3C3H Score": 0.1788 }, "Tasks Scores": { "Question Answering (QA)": 0.0802, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7886, "Reasoning": 0.1887 } }, "Meta": { "Model Name": "silma-ai/SILMA-Kashif-2B-Instruct-v1.0", "License": "Gemma", "Revision": "main", "Precision": "bfloat16", "Params": 2.453, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.1082, "Completeness": 0.0442, "Conciseness": 0.0039, "Helpfulness": 0.0263, "Honesty": 0.0624, "Harmlessness": 0.101, "3C3H Score": 0.0577 }, "Tasks Scores": { "Question Answering (QA)": 0.0882, "Orthographic and Grammatical Analysis": 0.0125, "Safety": 0.0, "Reasoning": 0.022 } }, "Meta": { "Model Name": "stabilityai/ar-stablelm-2-chat", "License": "other", "Revision": "main", "Precision": "float32", "Params": 2.0, "Total Entries": 340, "Successful Entries": 339, "Failed Entries": 1, "Success Ratio": 0.9971 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3431, "Completeness": 0.2892, "Conciseness": 0.1588, "Helpfulness": 0.288, "Honesty": 0.3208, "Harmlessness": 0.3431, "3C3H Score": 0.2905 }, "Tasks Scores": { "Question Answering (QA)": 0.2097, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.8677, "Reasoning": 0.3161 } }, "Meta": { "Model Name": "utter-project/EuroLLM-9B-Instruct", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 9.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2363, "Completeness": 0.2255, "Conciseness": 0.1157, "Helpfulness": 0.2238, "Honesty": 0.2299, "Harmlessness": 0.2363, "3C3H Score": 0.2112 }, "Tasks Scores": { "Question Answering (QA)": 0.1266, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.4261, "Reasoning": 0.4208 } }, "Meta": { "Model Name": "CohereForAI/c4ai-command-r7b-12-2024", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "bfloat16", "Params": 8.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3206, "Completeness": 0.3147, "Conciseness": 0.1387, "Helpfulness": 0.3103, "Honesty": 0.3096, "Harmlessness": 0.3199, "3C3H Score": 0.2856 }, "Tasks Scores": { "Question Answering (QA)": 0.1514, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.6552, "Reasoning": 0.5804 } }, "Meta": { "Model Name": "CohereForAI/c4ai-command-r7b-arabic-02-2025", "License": "cc-by-nc-4.0", "Revision": "main", "Precision": "bfloat16", "Params": 8.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.1765, "Completeness": 0.0931, "Conciseness": 0.1333, "Helpfulness": 0.1201, "Honesty": 0.1681, "Harmlessness": 0.175, "3C3H Score": 0.1444 }, "Tasks Scores": { "Question Answering (QA)": 0.1533, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.3083, "Reasoning": 0.0869 } }, "Meta": { "Model Name": "FreedomIntelligence/AceGPT-v1.5-13B-Chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 13.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3598, "Completeness": 0.2961, "Conciseness": 0.2625, "Helpfulness": 0.3208, "Honesty": 0.3532, "Harmlessness": 0.3591, "3C3H Score": 0.3252 }, "Tasks Scores": { "Question Answering (QA)": 0.1946, "Orthographic and Grammatical Analysis": 0.0333, "Safety": 0.9083, "Reasoning": 0.4905 } }, "Meta": { "Model Name": "FreedomIntelligence/AceGPT-v2-32B-Chat", "License": "apache-2.0", "Revision": "main", "Precision": "float16", "Params": 32.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4343, "Completeness": 0.3235, "Conciseness": 0.3216, "Helpfulness": 0.3755, "Honesty": 0.424, "Harmlessness": 0.4336, "3C3H Score": 0.3854 }, "Tasks Scores": { "Question Answering (QA)": 0.3131, "Orthographic and Grammatical Analysis": 0.025, "Safety": 0.8875, "Reasoning": 0.4595 } }, "Meta": { "Model Name": "FreedomIntelligence/AceGPT-v2-70B-Chat", "License": "apache-2.0", "Revision": "main", "Precision": "float16", "Params": 70.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3275, "Completeness": 0.3108, "Conciseness": 0.1395, "Helpfulness": 0.3081, "Honesty": 0.3174, "Harmlessness": 0.326, "3C3H Score": 0.2882 }, "Tasks Scores": { "Question Answering (QA)": 0.1199, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7729, "Reasoning": 0.6155 } }, "Meta": { "Model Name": "Qwen/Qwen2.5-7B-Instruct", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 7.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4098, "Completeness": 0.3539, "Conciseness": 0.2368, "Helpfulness": 0.3792, "Honesty": 0.3887, "Harmlessness": 0.4098, "3C3H Score": 0.363 }, "Tasks Scores": { "Question Answering (QA)": 0.2707, "Orthographic and Grammatical Analysis": 0.0514, "Safety": 0.8927, "Reasoning": 0.4577 } }, "Meta": { "Model Name": "claude-3-haiku-20240307", "License": "Proprietary", "Revision": "UNK", "Precision": "UNK", "Params": "UNK", "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3931, "Completeness": 0.3765, "Conciseness": 0.211, "Helpfulness": 0.377, "Honesty": 0.3843, "Harmlessness": 0.3931, "3C3H Score": 0.3558 }, "Tasks Scores": { "Question Answering (QA)": 0.2201, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.8865, "Reasoning": 0.5929 } }, "Meta": { "Model Name": "google/gemma-2-27b-it", "License": "gemma", "Revision": "main", "Precision": "bfloat16", "Params": 27.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3343, "Completeness": 0.3196, "Conciseness": 0.1861, "Helpfulness": 0.323, "Honesty": 0.3294, "Harmlessness": 0.3336, "3C3H Score": 0.3043 }, "Tasks Scores": { "Question Answering (QA)": 0.1633, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.8875, "Reasoning": 0.5072 } }, "Meta": { "Model Name": "google/gemma-2-9b-it", "License": "gemma", "Revision": "main", "Precision": "bfloat16", "Params": 9.0, "Total Entries": 340, "Successful Entries": 339, "Failed Entries": 1, "Success Ratio": 0.9971 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4888, "Completeness": 0.4792, "Conciseness": 0.1976, "Helpfulness": 0.4662, "Honesty": 0.4702, "Harmlessness": 0.488, "3C3H Score": 0.4317 }, "Tasks Scores": { "Question Answering (QA)": 0.2443, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7927, "Reasoning": 0.8 } }, "Meta": { "Model Name": "google/gemma-3-12b-it", "License": "gemma", "Revision": "main", "Precision": "bfloat16", "Params": 12.0, "Total Entries": 340, "Successful Entries": 313, "Failed Entries": 27, "Success Ratio": 0.9206 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2101, "Completeness": 0.2041, "Conciseness": 0.0466, "Helpfulness": 0.1834, "Honesty": 0.1997, "Harmlessness": 0.2034, "3C3H Score": 0.1746 }, "Tasks Scores": { "Question Answering (QA)": 0.0694, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.7292, "Reasoning": 0.2298 } }, "Meta": { "Model Name": "google/gemma-3-1b-it", "License": "gemma", "Revision": "main", "Precision": "bfloat16", "Params": 1.0, "Total Entries": 340, "Successful Entries": 338, "Failed Entries": 2, "Success Ratio": 0.9941 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.5231, "Completeness": 0.5064, "Conciseness": 0.1868, "Helpfulness": 0.4939, "Honesty": 0.5044, "Harmlessness": 0.5172, "3C3H Score": 0.4553 }, "Tasks Scores": { "Question Answering (QA)": 0.3213, "Orthographic and Grammatical Analysis": 0.0292, "Safety": 0.7724, "Reasoning": 0.8441 } }, "Meta": { "Model Name": "google/gemma-3-27b-it", "License": "gemma", "Revision": "main", "Precision": "bfloat16", "Params": 27.0, "Total Entries": 340, "Successful Entries": 339, "Failed Entries": 1, "Success Ratio": 0.9971 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3392, "Completeness": 0.3363, "Conciseness": 0.1088, "Helpfulness": 0.3186, "Honesty": 0.3316, "Harmlessness": 0.337, "3C3H Score": 0.2953 }, "Tasks Scores": { "Question Answering (QA)": 0.1067, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.8229, "Reasoning": 0.6589 } }, "Meta": { "Model Name": "google/gemma-3-4b-it", "License": "gemma", "Revision": "main", "Precision": "bfloat16", "Params": 4.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.1667, "Completeness": 0.1627, "Conciseness": 0.0603, "Helpfulness": 0.1392, "Honesty": 0.1439, "Harmlessness": 0.1615, "3C3H Score": 0.1391 }, "Tasks Scores": { "Question Answering (QA)": 0.0885, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.3938, "Reasoning": 0.1976 } }, "Meta": { "Model Name": "inceptionai/jais-family-1p3b-chat", "License": "apache-2.0", "Revision": "main", "Precision": "float32", "Params": 1.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.3931, "Completeness": 0.3441, "Conciseness": 0.2596, "Helpfulness": 0.361, "Honesty": 0.3784, "Harmlessness": 0.3895, "3C3H Score": 0.3543 }, "Tasks Scores": { "Question Answering (QA)": 0.2044, "Orthographic and Grammatical Analysis": 0.0333, "Safety": 0.8719, "Reasoning": 0.6244 } }, "Meta": { "Model Name": "malhajar/Shahin-v0.1", "License": "Open", "Revision": "main", "Precision": "float16", "Params": 27.519, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.4225, "Completeness": 0.3569, "Conciseness": 0.3252, "Helpfulness": 0.3777, "Honesty": 0.4147, "Harmlessness": 0.4218, "3C3H Score": 0.3865 }, "Tasks Scores": { "Question Answering (QA)": 0.2353, "Orthographic and Grammatical Analysis": 0.025, "Safety": 0.8542, "Reasoning": 0.706 } }, "Meta": { "Model Name": "meta-llama/Llama-3.1-70B-Instruct", "License": "llama3.1", "Revision": "main", "Precision": "bfloat16", "Params": 70.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2971, "Completeness": 0.2686, "Conciseness": 0.1968, "Helpfulness": 0.261, "Honesty": 0.2814, "Harmlessness": 0.2971, "3C3H Score": 0.267 }, "Tasks Scores": { "Question Answering (QA)": 0.1176, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.8792, "Reasoning": 0.4583 } }, "Meta": { "Model Name": "meta-llama/Llama-3.1-8B-Instruct", "License": "llama3.1", "Revision": "main", "Precision": "bfloat16", "Params": 8.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.1353, "Completeness": 0.1176, "Conciseness": 0.0875, "Helpfulness": 0.1007, "Honesty": 0.1213, "Harmlessness": 0.1301, "3C3H Score": 0.1154 }, "Tasks Scores": { "Question Answering (QA)": 0.0479, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.5875, "Reasoning": 0.0881 } }, "Meta": { "Model Name": "meta-llama/Llama-3.2-1B-Instruct", "License": "llama3.2", "Revision": "main", "Precision": "bfloat16", "Params": 1.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2468, "Completeness": 0.2271, "Conciseness": 0.1657, "Helpfulness": 0.204, "Honesty": 0.2335, "Harmlessness": 0.2424, "3C3H Score": 0.2199 }, "Tasks Scores": { "Question Answering (QA)": 0.0782, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.9021, "Reasoning": 0.3274 } }, "Meta": { "Model Name": "meta-llama/Llama-3.2-3B-Instruct", "License": "llama3.2", "Revision": "main", "Precision": "bfloat16", "Params": 3.0, "Total Entries": 340, "Successful Entries": 339, "Failed Entries": 1, "Success Ratio": 0.9971 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.448, "Completeness": 0.3725, "Conciseness": 0.3586, "Helpfulness": 0.3939, "Honesty": 0.4402, "Harmlessness": 0.4478, "3C3H Score": 0.4102 }, "Tasks Scores": { "Question Answering (QA)": 0.2719, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.8792, "Reasoning": 0.7131 } }, "Meta": { "Model Name": "meta-llama/Llama-3.3-70B-Instruct", "License": "llama3.3", "Revision": "main", "Precision": "bfloat16", "Params": 70.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.0686, "Completeness": 0.0657, "Conciseness": 0.036, "Helpfulness": 0.0615, "Honesty": 0.0662, "Harmlessness": 0.0684, "3C3H Score": 0.0611 }, "Tasks Scores": { "Question Answering (QA)": 0.044, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.0, "Reasoning": 0.1708 } }, "Meta": { "Model Name": "meta-llama/Meta-Llama-3-70B-Instruct", "License": "llama3", "Revision": "main", "Precision": "bfloat16", "Params": 70.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.0294, "Completeness": 0.0294, "Conciseness": 0.0127, "Helpfulness": 0.026, "Honesty": 0.0272, "Harmlessness": 0.0294, "3C3H Score": 0.0257 }, "Tasks Scores": { "Question Answering (QA)": 0.0299, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.0, "Reasoning": 0.0393 } }, "Meta": { "Model Name": "meta-llama/Meta-Llama-3-8B-Instruct", "License": "llama3", "Revision": "main", "Precision": "bfloat16", "Params": 14.963, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.2667, "Completeness": 0.2549, "Conciseness": 0.1257, "Helpfulness": 0.2368, "Honesty": 0.2507, "Harmlessness": 0.2659, "3C3H Score": 0.2335 }, "Tasks Scores": { "Question Answering (QA)": 0.1294, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.5042, "Reasoning": 0.4762 } }, "Meta": { "Model Name": "mistralai/Ministral-8B-Instruct-2410", "License": "mrl", "Revision": "main", "Precision": "bfloat16", "Params": 8.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.0039, "Completeness": 0.0039, "Conciseness": 0.0007, "Helpfulness": 0.0022, "Honesty": 0.0032, "Harmlessness": 0.0039, "3C3H Score": 0.003 }, "Tasks Scores": { "Question Answering (QA)": 0.0051, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.0, "Reasoning": 0.0 } }, "Meta": { "Model Name": "mistralai/Mistral-7B-Instruct-v0.2", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 7.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.1003, "Completeness": 0.0826, "Conciseness": 0.0258, "Helpfulness": 0.0597, "Honesty": 0.0774, "Harmlessness": 0.0966, "3C3H Score": 0.0737 }, "Tasks Scores": { "Question Answering (QA)": 0.0431, "Orthographic and Grammatical Analysis": 0.0, "Safety": 0.1646, "Reasoning": 0.1405 } }, "Meta": { "Model Name": "mistralai/Mistral-7B-Instruct-v0.3", "License": "apache-2.0", "Revision": "main", "Precision": "bfloat16", "Params": 7.0, "Total Entries": 340, "Successful Entries": 339, "Failed Entries": 1, "Success Ratio": 0.9971 } }, { "claude-3.5-sonnet Scores": { "3C3H Scores": { "Correctness": 0.501, "Completeness": 0.4794, "Conciseness": 0.2424, "Helpfulness": 0.4797, "Honesty": 0.4875, "Harmlessness": 0.501, "3C3H Score": 0.4485 }, "Tasks Scores": { "Question Answering (QA)": 0.3437, "Orthographic and Grammatical Analysis": 0.0514, "Safety": 0.7979, "Reasoning": 0.7185 } }, "Meta": { "Model Name": "mistralai/Mistral-Large-Instruct-2411", "License": "mrl", "Revision": "main", "Precision": "bfloat16", "Params": 123.0, "Total Entries": 340, "Successful Entries": 340, "Failed Entries": 0, "Success Ratio": 1.0 } }, { "_last_sync_timestamp": "2025-03-23T12:44:33.422103" } ]