{"all_primary_scores": ["arc_challenge::olmes: 0.385666", "arc_challenge:mc::olmes: 0.253413", "arc_challenge:rc::olmes: 0.385666"], "tasks": [{"alias": "arc_challenge::olmes", "metrics": {"primary_score": 0.3856655290102389}, "num_instances": 2344, "processing_time": 10.335389375686646, "task_config": {"task_name": "arc_challenge::olmes", "task_core": "arc_challenge", "split": "test", "num_shots": 5, "primary_metric": "mc_or_rc", "metric_kwargs": {"extra_metric_names": ["no_answer"]}, "fewshot_source": "OLMES:ARC-Challenge", "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "version": 0, "metadata": {"num_tasks": 2, "description": "Best of MC vs RC", "used_mc_or_rc": "rc", "alias": "arc_challenge::olmes"}}}, {"alias": "arc_challenge:mc::olmes", "metrics": {"acc_raw": 0.25341296928327645, "acc_per_token": 0.25341296928327645, "acc_per_char": 0.25341296928327645, "acc_per_byte": 0.25341296928327645, "sum_logits_corr": -1.4381909157330672, "logits_per_token_corr": -1.4381909157330672, "logits_per_char_corr": -0.7190954578665336, "bits_per_byte_corr": 1.0374354509905723, "primary_score": 0.25341296928327645, "extra_metrics": {"no_answer": 0.0}}, "num_instances": 1172, "processing_time": 8.28089714050293, "task_config": {"task_name": "arc_challenge:mc", "task_core": "arc_challenge", "split": "test", "num_shots": 5, "primary_metric": "acc_raw", "metric_kwargs": {"extra_metric_names": ["no_answer"]}, "fewshot_source": "OLMES:ARC-Challenge", "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "version": 0, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "arc_challenge:mc::olmes"}}}, {"alias": "arc_challenge:rc::olmes", "metrics": {"acc_raw": 0.3293515358361775, "acc_per_token": 0.34726962457337884, "acc_per_char": 0.3438566552901024, "acc_per_byte": 0.3438566552901024, "sum_logits_corr": -15.75188249934452, "logits_per_token_corr": -2.916146519577311, "logits_per_char_corr": -0.5858619454662958, "bits_per_byte_corr": 0.8440443257845128, "acc_uncond": 0.3856655290102389, "primary_score": 0.3856655290102389, "extra_metrics": {"no_answer": 0.0}}, "num_instances": 1172, "processing_time": 2.054492235183716, "task_config": {"task_name": "arc_challenge", "task_core": "arc_challenge", "split": "test", "num_shots": 5, "primary_metric": "acc_uncond", "metric_kwargs": {"uncond_docid_offset": 1000000, "extra_metric_names": ["no_answer"]}, "fewshot_source": "OLMES:ARC-Challenge", "dataset_path": "ai2_arc", "dataset_name": "ARC-Challenge", "version": 0, "metadata": {"regimes": ["OLMES-v0.1"], "alias": "arc_challenge:rc::olmes"}}}], "model_config": {"model": "allenai/OLMo-1B-hf", "metadata": {"alias": "olmo-1b"}}}