{ "config_general": { "lighteval_sha": "?", "num_fewshot_seeds": 1, "max_samples": null, "job_id": 0, "start_time": 7391.813776305, "end_time": 7504.824690543, "total_evaluation_time_secondes": "113.01091423799971", "model_name": "Qwen/Qwen3-0.6B", "model_sha": "", "model_dtype": null, "model_size": null, "generation_parameters": { "early_stopping": null, "repetition_penalty": null, "frequency_penalty": null, "length_penalty": null, "presence_penalty": null, "max_new_tokens": null, "min_new_tokens": null, "seed": null, "stop_tokens": null, "temperature": null, "top_k": null, "min_p": null, "top_p": null, "truncate_prompt": null, "response_format": null } }, "results": { "original|mmlu:anatomy|0": { "acc": 0.1925925925925926, "acc_stderr": 0.03406542058502653 }, "all": { "acc": 0.1925925925925926, "acc_stderr": 0.03406542058502653 } }, "versions": { "original|mmlu:anatomy|0": 0 }, "config_tasks": { "original|mmlu:anatomy": { "name": "mmlu:anatomy", "prompt_function": "mmlu_anatomy", "hf_repo": "cais/mmlu", "hf_subset": "anatomy", "metric": [ { "metric_name": "acc", "higher_is_better": true, "category": "8", "use_case": "1", "sample_level_fn": "compute", "corpus_level_fn": "mean" } ], "hf_revision": null, "hf_filter": null, "hf_avail_splits": [ "auxiliary_train", "test", "validation", "dev" ], "trust_dataset": true, "evaluation_splits": [ "test" ], "few_shots_split": "dev", "few_shots_select": "sequential", "generation_size": 1, "generation_grammar": null, "stop_sequence": [ "\n" ], "num_samples": null, "suite": [ "original", "mmlu" ], "original_num_docs": 135, "effective_num_docs": 135, "must_remove_duplicate_docs": false, "version": 0 } }, "summary_tasks": { "original|mmlu:anatomy|0": { "hashes": { "hash_examples": "2ace6ded4afc2a5e", "hash_full_prompts": "2ace6ded4afc2a5e", "hash_input_tokens": "2208f6c9b1418fb3", "hash_cont_tokens": "b6f29f2efe9d60e5" }, "truncated": 0, "non_truncated": 135, "padded": 0, "non_padded": 540, "effective_few_shots": 0.0, "num_truncated_few_shots": 0 } }, "summary_general": { "hashes": { "hash_examples": "b4397c7c7be2716f", "hash_full_prompts": "b4397c7c7be2716f", "hash_input_tokens": "b6d70b8b90cdad6b", "hash_cont_tokens": "d0f0c11bb15a1909" }, "truncated": 0, "non_truncated": 135, "padded": 0, "non_padded": 540, "num_truncated_few_shots": 0 } }