|
{ |
|
"config_general": { |
|
"lighteval_sha": "?", |
|
"num_fewshot_seeds": 1, |
|
"max_samples": null, |
|
"job_id": 0, |
|
"start_time": 4012.162903884, |
|
"end_time": 4201.836127132, |
|
"total_evaluation_time_secondes": "189.67322324799989", |
|
"model_name": "HuggingFaceTB/SmolLM2-360M", |
|
"model_sha": "f8027fd0eaeea54caa13c31d31b9fdc459c38b49", |
|
"model_dtype": null, |
|
"model_size": "1.35 GB", |
|
"generation_parameters": { |
|
"early_stopping": null, |
|
"repetition_penalty": null, |
|
"frequency_penalty": null, |
|
"length_penalty": null, |
|
"presence_penalty": null, |
|
"max_new_tokens": null, |
|
"min_new_tokens": null, |
|
"seed": null, |
|
"stop_tokens": null, |
|
"temperature": null, |
|
"top_k": null, |
|
"min_p": null, |
|
"top_p": null, |
|
"truncate_prompt": null, |
|
"response_format": null |
|
} |
|
}, |
|
"results": { |
|
"leaderboard|truthfulqa:mc|0": { |
|
"truthfulqa_mc1": 0.211750305997552, |
|
"truthfulqa_mc1_stderr": 0.014302068353925612, |
|
"truthfulqa_mc2": 0.33432310924938496, |
|
"truthfulqa_mc2_stderr": 0.013335337173043119 |
|
}, |
|
"all": { |
|
"truthfulqa_mc1": 0.211750305997552, |
|
"truthfulqa_mc1_stderr": 0.014302068353925612, |
|
"truthfulqa_mc2": 0.33432310924938496, |
|
"truthfulqa_mc2_stderr": 0.013335337173043119 |
|
} |
|
}, |
|
"versions": { |
|
"leaderboard|truthfulqa:mc|0": 0 |
|
}, |
|
"config_tasks": { |
|
"leaderboard|truthfulqa:mc": { |
|
"name": "truthfulqa:mc", |
|
"prompt_function": "truthful_qa_multiple_choice", |
|
"hf_repo": "truthful_qa", |
|
"hf_subset": "multiple_choice", |
|
"metric": [ |
|
{ |
|
"metric_name": [ |
|
"truthfulqa_mc1", |
|
"truthfulqa_mc2" |
|
], |
|
"higher_is_better": { |
|
"truthfulqa_mc1": true, |
|
"truthfulqa_mc2": true |
|
}, |
|
"category": "8", |
|
"use_case": "1", |
|
"sample_level_fn": "truthfulqa_mc_metrics", |
|
"corpus_level_fn": { |
|
"truthfulqa_mc1": "mean", |
|
"truthfulqa_mc2": "mean" |
|
} |
|
} |
|
], |
|
"hf_revision": null, |
|
"hf_filter": null, |
|
"hf_avail_splits": [ |
|
"validation" |
|
], |
|
"trust_dataset": true, |
|
"evaluation_splits": [ |
|
"validation" |
|
], |
|
"few_shots_split": null, |
|
"few_shots_select": null, |
|
"generation_size": -1, |
|
"generation_grammar": null, |
|
"stop_sequence": [ |
|
"\n" |
|
], |
|
"num_samples": null, |
|
"suite": [ |
|
"leaderboard" |
|
], |
|
"original_num_docs": 817, |
|
"effective_num_docs": 817, |
|
"must_remove_duplicate_docs": false, |
|
"version": 0 |
|
} |
|
}, |
|
"summary_tasks": { |
|
"leaderboard|truthfulqa:mc|0": { |
|
"hashes": { |
|
"hash_examples": "36a6d90e75d92d4a", |
|
"hash_full_prompts": "36a6d90e75d92d4a", |
|
"hash_input_tokens": "54863e78b01fe794", |
|
"hash_cont_tokens": "7d6f5aee3c38a72f" |
|
}, |
|
"truncated": 0, |
|
"non_truncated": 817, |
|
"padded": 9216, |
|
"non_padded": 780, |
|
"effective_few_shots": 0.0, |
|
"num_truncated_few_shots": 0 |
|
} |
|
}, |
|
"summary_general": { |
|
"hashes": { |
|
"hash_examples": "aed1dfc67e53d0f2", |
|
"hash_full_prompts": "aed1dfc67e53d0f2", |
|
"hash_input_tokens": "1f841c18939d1dde", |
|
"hash_cont_tokens": "3f412ba4b35e4c16" |
|
}, |
|
"truncated": 0, |
|
"non_truncated": 817, |
|
"padded": 9216, |
|
"non_padded": 780, |
|
"num_truncated_few_shots": 0 |
|
} |
|
} |