{ "config_general": { "lighteval_sha": "?", "num_fewshot_seeds": 1, "max_samples": null, "job_id": 0, "start_time": 6052.722710683, "end_time": 6448.248247955, "total_evaluation_time_secondes": "395.5255372720003", "model_name": "Qwen/Qwen3-4B", "model_sha": "", "model_dtype": null, "model_size": null, "generation_parameters": { "early_stopping": null, "repetition_penalty": null, "frequency_penalty": null, "length_penalty": null, "presence_penalty": null, "max_new_tokens": null, "min_new_tokens": null, "seed": null, "stop_tokens": null, "temperature": null, "top_k": null, "min_p": null, "top_p": null, "truncate_prompt": null, "response_format": null } }, "results": { "leaderboard|truthfulqa:mc|0": { "truthfulqa_mc1": 0.3671970624235006, "truthfulqa_mc1_stderr": 0.01687480500145318, "truthfulqa_mc2": 0.5475268776886557, "truthfulqa_mc2_stderr": 0.015802591856386182 }, "all": { "truthfulqa_mc1": 0.3671970624235006, "truthfulqa_mc1_stderr": 0.01687480500145318, "truthfulqa_mc2": 0.5475268776886557, "truthfulqa_mc2_stderr": 0.015802591856386182 } }, "versions": { "leaderboard|truthfulqa:mc|0": 0 }, "config_tasks": { "leaderboard|truthfulqa:mc": { "name": "truthfulqa:mc", "prompt_function": "truthful_qa_multiple_choice", "hf_repo": "truthful_qa", "hf_subset": "multiple_choice", "metric": [ { "metric_name": [ "truthfulqa_mc1", "truthfulqa_mc2" ], "higher_is_better": { "truthfulqa_mc1": true, "truthfulqa_mc2": true }, "category": "8", "use_case": "1", "sample_level_fn": "truthfulqa_mc_metrics", "corpus_level_fn": { "truthfulqa_mc1": "mean", "truthfulqa_mc2": "mean" } } ], "hf_revision": null, "hf_filter": null, "hf_avail_splits": [ "validation" ], "trust_dataset": true, "evaluation_splits": [ "validation" ], "few_shots_split": null, "few_shots_select": null, "generation_size": -1, "generation_grammar": null, "stop_sequence": [ "\n" ], "num_samples": null, "suite": [ "leaderboard" ], "original_num_docs": 817, "effective_num_docs": 817, "must_remove_duplicate_docs": false, "version": 0 } }, "summary_tasks": { "leaderboard|truthfulqa:mc|0": { "hashes": { "hash_examples": "36a6d90e75d92d4a", "hash_full_prompts": "36a6d90e75d92d4a", "hash_input_tokens": "13cc8c1ed56c3c97", "hash_cont_tokens": "2aa05ab785b97e1d" }, "truncated": 0, "non_truncated": 817, "padded": 0, "non_padded": 9996, "effective_few_shots": 0.0, "num_truncated_few_shots": 0 } }, "summary_general": { "hashes": { "hash_examples": "aed1dfc67e53d0f2", "hash_full_prompts": "aed1dfc67e53d0f2", "hash_input_tokens": "e0d42156d66ed07f", "hash_cont_tokens": "bfa433470f6b6989" }, "truncated": 0, "non_truncated": 817, "padded": 0, "non_padded": 9996, "num_truncated_few_shots": 0 } }