[ { "agent_name": "Bgym-Llama-3-70b", "study_id": "study_id", "benchmark": "WorkArena-L1", "score": 17.9, "std_err": 0.6, "benchmark_specific": "No", "benchmark_tuned": "No", "followed_evaluation_protocol": "Yes", "reproducible": "Yes", "comments": "NA", "original_or_reproduced": "Original", "date_time": "2021-01-01 12:00:00" }, { "agent_name": "Bgym-Llama-3-70b", "study_id": "study_id", "benchmark": "WorkArena-L1", "score": 15.9, "std_err": 0.6, "benchmark_specific": "No", "benchmark_tuned": "No", "followed_evaluation_protocol": "Yes", "reproducible": "Yes", "comments": "NA", "original_or_reproduced": "Reproduced", "date_time": "2021-01-04 12:06:00" }, { "agent_name": "Bgym-Llama-3-70b", "study_id": "study_id", "benchmark": "WorkArena-L1", "score": 19.9, "std_err": 0.6, "benchmark_specific": "No", "benchmark_tuned": "No", "followed_evaluation_protocol": "Yes", "reproducible": "Yes", "comments": "NA", "original_or_reproduced": "Reproduced", "date_time": "2021-01-05 2:07:00" }, { "agent_name": "Bgym-Llama-3-70b", "study_id": "study_id", "benchmark": "WorkArena-L1", "score": 17.9, "std_err": 0.6, "benchmark_specific": "No", "benchmark_tuned": "No", "followed_evaluation_protocol": "Yes", "reproducible": "Yes", "comments": "NA", "original_or_reproduced": "Reproduced", "date_time": "2021-01-12 12:00:00" } ]