{ "config_general": { "lighteval_sha": "?", "num_fewshot_seeds": 1, "max_samples": null, "job_id": 0, "start_time": 3333.242390676, "end_time": 3866.725562865, "total_evaluation_time_secondes": "533.483172189", "model_name": "allenai/OLMo-2-0425-1B", "model_sha": "1352aa4026bfcdea35bc2397d5c0f1c138970593", "model_dtype": null, "model_size": "5.53 GB", "generation_parameters": { "early_stopping": null, "repetition_penalty": null, "frequency_penalty": null, "length_penalty": null, "presence_penalty": null, "max_new_tokens": null, "min_new_tokens": null, "seed": null, "stop_tokens": null, "temperature": null, "top_k": null, "min_p": null, "top_p": null, "truncate_prompt": null, "response_format": null } }, "results": { "leaderboard|truthfulqa:mc|0": { "truthfulqa_mc1": 0.23133414932680538, "truthfulqa_mc1_stderr": 0.014761945174862673, "truthfulqa_mc2": 0.3685296588302264, "truthfulqa_mc2_stderr": 0.01359981212303006 }, "all": { "truthfulqa_mc1": 0.23133414932680538, "truthfulqa_mc1_stderr": 0.014761945174862673, "truthfulqa_mc2": 0.3685296588302264, "truthfulqa_mc2_stderr": 0.01359981212303006 } }, "versions": { "leaderboard|truthfulqa:mc|0": 0 }, "config_tasks": { "leaderboard|truthfulqa:mc": { "name": "truthfulqa:mc", "prompt_function": "truthful_qa_multiple_choice", "hf_repo": "truthful_qa", "hf_subset": "multiple_choice", "metric": [ { "metric_name": [ "truthfulqa_mc1", "truthfulqa_mc2" ], "higher_is_better": { "truthfulqa_mc1": true, "truthfulqa_mc2": true }, "category": "8", "use_case": "1", "sample_level_fn": "truthfulqa_mc_metrics", "corpus_level_fn": { "truthfulqa_mc1": "mean", "truthfulqa_mc2": "mean" } } ], "hf_revision": null, "hf_filter": null, "hf_avail_splits": [ "validation" ], "trust_dataset": true, "evaluation_splits": [ "validation" ], "few_shots_split": null, "few_shots_select": null, "generation_size": -1, "generation_grammar": null, "stop_sequence": [ "\n" ], "num_samples": null, "suite": [ "leaderboard" ], "original_num_docs": 817, "effective_num_docs": 817, "must_remove_duplicate_docs": false, "version": 0 } }, "summary_tasks": { "leaderboard|truthfulqa:mc|0": { "hashes": { "hash_examples": "36a6d90e75d92d4a", "hash_full_prompts": "36a6d90e75d92d4a", "hash_input_tokens": "404fdea99381fcbe", "hash_cont_tokens": "cc3f0628ee1c9d08" }, "truncated": 0, "non_truncated": 817, "padded": 9361, "non_padded": 635, "effective_few_shots": 0.0, "num_truncated_few_shots": 0 } }, "summary_general": { "hashes": { "hash_examples": "aed1dfc67e53d0f2", "hash_full_prompts": "aed1dfc67e53d0f2", "hash_input_tokens": "2b653e3dd18a0749", "hash_cont_tokens": "ca187de990176e1e" }, "truncated": 0, "non_truncated": 817, "padded": 9361, "non_padded": 635, "num_truncated_few_shots": 0 } }