diff --git a/output/amazon.nova-lite-v1/BFCL_v3_irrelevance.parquet b/output/amazon.nova-lite-v1/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..be217793e556d17015051d6a6d882b9ca0547758 --- /dev/null +++ b/output/amazon.nova-lite-v1/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2dcd1616deeaea3383dfa39a9102d0c2d992edda3765b02a2bd8f8aeddf58c30 +size 48466 diff --git a/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..94620c25d16fb311a037adb3c9322a57239ce9c0 --- /dev/null +++ b/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15fa0901354be2498d0d4a47eac7a15314eff7efd7e3a21a256e1e974706f0c0 +size 25033 diff --git a/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..31d1ea218b6ce5e08614ba0a97957d8cf070cc10 --- /dev/null +++ b/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62ce4ab5f9f7f7718e8a1189f54bad178d11c208e4a7c56734d7f8df29ed513 +size 22775 diff --git a/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_composite.parquet b/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..eac1c13a2958472e225f4c799047ee10f54e71e4 --- /dev/null +++ b/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34a9a5b499d819081a1172dc4c9ab816d1bb47dab83f0986eabaaea2fcc397c0 +size 55383 diff --git a/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_long_context.parquet b/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c0bee6580d2d41113986cf2a4b2e687b34f12837 --- /dev/null +++ b/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d759bc9e89235b76c85fb1d71541b1b65228b76f71bc51bfa751e0cb4ffc6ef4 +size 40658 diff --git a/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_miss_func.parquet b/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..23a2e6098d75f77ee029c4804231ed80029eb359 --- /dev/null +++ b/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:810d26e3c521acfc7d89b1c4c58b6a455e70c94cf4fd9c95e54f70d2e067df23 +size 53383 diff --git a/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_miss_param.parquet b/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..40d1650305a8e3cb041f6e2553d5719ee848d8cf --- /dev/null +++ b/output/amazon.nova-lite-v1/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3e1b14de9ee6fa1050414d36b8836e817b8ec630b8fee11ef0d4921c42d378da +size 50674 diff --git a/output/amazon.nova-lite-v1/tau_long_context.parquet b/output/amazon.nova-lite-v1/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..edda9767ef5ba675e7b8909a02a7a03c20fda422 --- /dev/null +++ b/output/amazon.nova-lite-v1/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1010301a63be8936b2753ab6900d33606bc5578183a9471fc48a74f53dfb9e56 +size 61971 diff --git a/output/amazon.nova-lite-v1/toolace_single_func_call_1.parquet b/output/amazon.nova-lite-v1/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..08d1761cf4ca3ac69832ad0945f6d3a8ed20da15 --- /dev/null +++ b/output/amazon.nova-lite-v1/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f5d71d21e1b4fac81282e04c555bb1c2339cb4dd4f0557a8af3e48bd083888d +size 19890 diff --git a/output/amazon.nova-lite-v1/toolace_single_func_call_2.parquet b/output/amazon.nova-lite-v1/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f039046b7b009633430e604badfb3b5250eebabc --- /dev/null +++ b/output/amazon.nova-lite-v1/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c74206661ddc9f85744202a89626d3b1982de9624a14bc4239b81a6d0293a41 +size 14211 diff --git a/output/amazon.nova-lite-v1/xlam_multiple_tool_multiple_call.parquet b/output/amazon.nova-lite-v1/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c632be0df9c784dbd6ecd2013e24c1bfdb541acd --- /dev/null +++ b/output/amazon.nova-lite-v1/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33096da4b9f925e0d5f843bcc4c4cbab018b7478f5cde2d8e3ce5e642c40e3d0 +size 133099 diff --git a/output/amazon.nova-lite-v1/xlam_multiple_tool_single_call.parquet b/output/amazon.nova-lite-v1/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3c00c8c694579aebdc767c340a8ea295f8027dd9 --- /dev/null +++ b/output/amazon.nova-lite-v1/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fce63d3f2da0a90318d08253990879194675926d3d9ac6eb4481cf21f0324374 +size 53551 diff --git a/output/amazon.nova-lite-v1/xlam_single_tool_multiple_call.parquet b/output/amazon.nova-lite-v1/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..81480db1b136bd03af26a91c6e7cca6ff123c1ef --- /dev/null +++ b/output/amazon.nova-lite-v1/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d62b0e37dc3e3b66004740ab882e323a6ca4b8f4b4773f730147ab3344da564d +size 38370 diff --git a/output/amazon.nova-lite-v1/xlam_single_tool_single_call.parquet b/output/amazon.nova-lite-v1/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..52ed104d2ef17ec82419527cdee62787402233a3 --- /dev/null +++ b/output/amazon.nova-lite-v1/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac98803ef4fc1c8c881ffdfa005e9375e58df379d749c0af12206b1b0f53123 +size 58911 diff --git a/output/amazon.nova-lite-v1/xlam_tool_miss.parquet b/output/amazon.nova-lite-v1/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5e6c2ef718d6f36db36f4b75ba1a002870037e9d --- /dev/null +++ b/output/amazon.nova-lite-v1/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:473ddb3aa98621682fb427699f62a0690b8637032b4307e1dd877afeb533e484 +size 67365 diff --git a/output/amazon.nova-micro-v1/BFCL_v3_irrelevance.parquet b/output/amazon.nova-micro-v1/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d772fc4da5dff3022ae873b989bbc8e12746eb94 --- /dev/null +++ b/output/amazon.nova-micro-v1/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:31575e7952951b0833c904f374fa8f05995524549e60c39231b48831ad874be2 +size 53061 diff --git a/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6a9e2d50d126939be4790a23cbea117f7dd41f75 --- /dev/null +++ b/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a7f43c9f8a9f28a191176b16eeda742a4c7b2cdc9635afae0311ea157b61057 +size 25271 diff --git a/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..18b29135175e3106cd8d67af122bbbfcfbaa1afd --- /dev/null +++ b/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71d29a0806809628f9c4c76eb5546967c48d4d905815c2cdeaaea3b036ed5e60 +size 25264 diff --git a/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_composite.parquet b/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0a1574e388fce0b6134209bcf797c6338be13b72 --- /dev/null +++ b/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:174e4210d557b2e76ef854847a8fca342cff7fffb7fb0847fc91cbdda0cce723 +size 61189 diff --git a/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_long_context.parquet b/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7ef9f6f428edca66728f8ad69643b5b14f1f3fae --- /dev/null +++ b/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bcaae3130de3aecc8789f4d765d11b6bf3cb415b1d34b5c72a6cd4d9c0d8f948 +size 42554 diff --git a/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_miss_func.parquet b/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8ea38aaabf920327650c48c4d9002e7534da691b --- /dev/null +++ b/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca5adb09d5bb4cbd4a97471f037dfb780187aca0d537d954ded60829a1e4c74 +size 54895 diff --git a/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_miss_param.parquet b/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4d50e98d1388f5a7af11823b2366c94b50af1cb1 --- /dev/null +++ b/output/amazon.nova-micro-v1/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fba1a48800c573a9156a1318c6d614b008c7b2e8e6639b9cf7ed2d38fc3e800 +size 51898 diff --git a/output/amazon.nova-micro-v1/tau_long_context.parquet b/output/amazon.nova-micro-v1/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b5e3a6374bd48e718422f744177a94d69cdce350 --- /dev/null +++ b/output/amazon.nova-micro-v1/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f1dfb265f5f2fac1d7e9294621a2fa12d094028184c06401416ae904d40f9d6 +size 69358 diff --git a/output/amazon.nova-micro-v1/toolace_single_func_call_1.parquet b/output/amazon.nova-micro-v1/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b029e41d9554a0aacf57deba5a141cd0a55ae3d7 --- /dev/null +++ b/output/amazon.nova-micro-v1/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a68173f8065d8b11f43d762af26c1df0e89a45d5ed5f8058306930e46ad4d0c7 +size 22350 diff --git a/output/amazon.nova-micro-v1/toolace_single_func_call_2.parquet b/output/amazon.nova-micro-v1/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..93369a34a8310e489c2c23b8d7ab8b822d2ed4ee --- /dev/null +++ b/output/amazon.nova-micro-v1/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89e42e0bfca8e17d99ee003569924838f4951869f3e3415e4bcc995ca9350682 +size 16541 diff --git a/output/amazon.nova-micro-v1/xlam_multiple_tool_multiple_call.parquet b/output/amazon.nova-micro-v1/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9254e0d1c3cb9c100c936854348967211ec133f4 --- /dev/null +++ b/output/amazon.nova-micro-v1/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac7ff4d1f9820a1d99a07176043690c6da679728fb9b75106b99689e96816759 +size 146318 diff --git a/output/amazon.nova-micro-v1/xlam_multiple_tool_single_call.parquet b/output/amazon.nova-micro-v1/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..44c547cda60930335197d70b6a8acd1de2274e38 --- /dev/null +++ b/output/amazon.nova-micro-v1/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3d8f3e76bf66b24d9782bbed388636788f43b3af038e0f48ecc0c92ee6fc6b0 +size 63462 diff --git a/output/amazon.nova-micro-v1/xlam_single_tool_multiple_call.parquet b/output/amazon.nova-micro-v1/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c27a656263a7cd25a62d4fd2f591ac50217db36e --- /dev/null +++ b/output/amazon.nova-micro-v1/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d8d315f92753522c35e7e2e707def7a24d4f24acf81adcaf1da8800ef896cc5 +size 43319 diff --git a/output/amazon.nova-micro-v1/xlam_single_tool_single_call.parquet b/output/amazon.nova-micro-v1/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..cef7acc8517f1ecbdec55d0230460cc5b9bc2cff --- /dev/null +++ b/output/amazon.nova-micro-v1/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ab6ce6d294a474a471c7cd1299a0a819a1ef6acf8932a885b81813cb4fde75d +size 63580 diff --git a/output/amazon.nova-micro-v1/xlam_tool_miss.parquet b/output/amazon.nova-micro-v1/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e913e5a4ed620897e79a60bdf72c05b8294475c2 --- /dev/null +++ b/output/amazon.nova-micro-v1/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d7db3f4c1181e314769c2d89813b07329fe691d8e73d23f3c369ad9242893df +size 78774 diff --git a/output/amazon.nova-pro-v1/BFCL_v3_irrelevance.parquet b/output/amazon.nova-pro-v1/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c3369ced7958434d9555a034364f92fcc7c3534e --- /dev/null +++ b/output/amazon.nova-pro-v1/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8354b8b7c9c2fbeabd1eb7bc3df7f32a3670400cb9b7bad9086b37f97b91b92 +size 47087 diff --git a/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7215c28ad5ee20582fed3ded7968156f87184b6c --- /dev/null +++ b/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dc0248cd772ea635c79d528ac31ca41230987e2c7af0835ee763d31b673d8546 +size 24981 diff --git a/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8c8fee04cba970dce4384fcaecdd18b98d66bf5d --- /dev/null +++ b/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:362935e02f5b23ef457aa992e4700b0476879237845ba0f640533b25f94ea19a +size 23609 diff --git a/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_composite.parquet b/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..13b4ec40597210f4b77e095937f1cc6a211858c8 --- /dev/null +++ b/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a8db125e5ab58b03da2ef7cf184abb804bb617590fdbaa7b5d6b0908450069d +size 53424 diff --git a/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_long_context.parquet b/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..cc5dffaa46dbb8c06f67de9669f032e03502d6ec --- /dev/null +++ b/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47ccca817f549a9db204e55697fdc0dbe6230a7d3094d5b7909e1e95739f8f3f +size 37944 diff --git a/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_miss_func.parquet b/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0a98e151ceab21b135e8bdf3225fc8c6500ac057 --- /dev/null +++ b/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01d9621e7ab87a8fc8343bcc7c6a2c9112263553d590c5b0ea03cd0335f11e7f +size 49128 diff --git a/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_miss_param.parquet b/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..15a6d5d5137581bba5d4bf32d1d772f349b5bd5c --- /dev/null +++ b/output/amazon.nova-pro-v1/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:702d1cb83c0c1d92dc04112a297841aa4716044956aae8160bb336de264e2aa0 +size 50128 diff --git a/output/amazon.nova-pro-v1/tau_long_context.parquet b/output/amazon.nova-pro-v1/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ed360dee9a18bc18e109ad3d1c0927e8d3672993 --- /dev/null +++ b/output/amazon.nova-pro-v1/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8109ff5cfcef34287c8b37b65938f1a667dd87f59a9aad3e6187375c5e6ab55e +size 61166 diff --git a/output/amazon.nova-pro-v1/toolace_single_func_call_1.parquet b/output/amazon.nova-pro-v1/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..58a7a10f2bcd260536a3aae55a874999f06b99df --- /dev/null +++ b/output/amazon.nova-pro-v1/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74ff07cb9a13c34a7d1472a4c532498f12c505ef44759e354fed172ed7ade520 +size 19610 diff --git a/output/amazon.nova-pro-v1/toolace_single_func_call_2.parquet b/output/amazon.nova-pro-v1/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b23151ee462f2587d51d78116691ad6a1dbe0df0 --- /dev/null +++ b/output/amazon.nova-pro-v1/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832cd7d22cac68defbf561251c74623c3efcd03febe34cd093f250c2b101614d +size 15538 diff --git a/output/amazon.nova-pro-v1/xlam_multiple_tool_multiple_call.parquet b/output/amazon.nova-pro-v1/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ee9e3df4ae2131abf2e7f8753e0356ad89bc7f6b --- /dev/null +++ b/output/amazon.nova-pro-v1/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9207c681f9efdd81020de0831b3df9082c291bf8791bab84373bbbf04a9c951 +size 139562 diff --git a/output/amazon.nova-pro-v1/xlam_multiple_tool_single_call.parquet b/output/amazon.nova-pro-v1/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0597eb4501f6a3217f6a4d1bc47f381c2fc7d620 --- /dev/null +++ b/output/amazon.nova-pro-v1/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e7d8ada03bf36edc50d1fe9cc449d0d312b6de3a0e22141a26b483a17573120 +size 58265 diff --git a/output/amazon.nova-pro-v1/xlam_single_tool_multiple_call.parquet b/output/amazon.nova-pro-v1/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e9d2da7e2ae2f6c822dfbd5d73df8bdce75b96d4 --- /dev/null +++ b/output/amazon.nova-pro-v1/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2116286125a19e38b81ba3088aefaa6502d9867bf5cf5f199de01b8b29e1fc4f +size 38762 diff --git a/output/amazon.nova-pro-v1/xlam_single_tool_single_call.parquet b/output/amazon.nova-pro-v1/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..34d8e6cb53d636f7c6907f5af5014fdac5fd66e2 --- /dev/null +++ b/output/amazon.nova-pro-v1/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7924bbfe3b182ea6c7e370c8c36550309ab9b08d016c1160506bcbc9ddbf9cd +size 61094 diff --git a/output/amazon.nova-pro-v1/xlam_tool_miss.parquet b/output/amazon.nova-pro-v1/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..001442e6e0ca096dffb2e44e91bd8030c04932ec --- /dev/null +++ b/output/amazon.nova-pro-v1/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c02b949bd37bf76d89df3567c6cbb6a72d43328b4561d652f52c95dec9b7755 +size 67286 diff --git a/output/mistral-small-2503/BFCL_v3_irrelevance.parquet b/output/mistral-small-2503/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1bcdecd6c3abf59145a8ea395a18ed72827118eb --- /dev/null +++ b/output/mistral-small-2503/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83d59b6fa3d5bec4705d3684807dcd5f3736d7f943bd6c6ac39df9b2c9a75709 +size 37163 diff --git a/output/mistral-small-2503/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/mistral-small-2503/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f4c75cc65ab174bbb25814f454c5114b8c041093 --- /dev/null +++ b/output/mistral-small-2503/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:002a6c91fe85ecd9b22a13d9b480680d4bc291a5429321643062a2535a5e34ee +size 23394 diff --git a/output/mistral-small-2503/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/mistral-small-2503/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..cfd5d26b470321e1740cb84cdba08884b8c2f098 --- /dev/null +++ b/output/mistral-small-2503/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bee6116b4572d9695bdab3453648e0ebaa378838508690a8f0ff54f1b25372e +size 22208 diff --git a/output/mistral-small-2503/BFCL_v3_multi_turn_composite.parquet b/output/mistral-small-2503/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..dc52577ab489f75c68fa9173e8934d61daa38bff --- /dev/null +++ b/output/mistral-small-2503/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:065613b6a0d491b2df12664c7d07054b6d60dc91bad1b5520dcc1c892387750a +size 44173 diff --git a/output/mistral-small-2503/BFCL_v3_multi_turn_long_context.parquet b/output/mistral-small-2503/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5bffafe2bc5c2bc8cf1fdda255b8afdb38e7370d --- /dev/null +++ b/output/mistral-small-2503/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b63d4b4123833e1b08ddb277ea06d795e27a1d52d0858053931ccb2598128e6 +size 37148 diff --git a/output/mistral-small-2503/BFCL_v3_multi_turn_miss_func.parquet b/output/mistral-small-2503/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c598a2d2a18e74c939e1cc8f7843fb0d07e48bb0 --- /dev/null +++ b/output/mistral-small-2503/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3a92773df3905831eb6bec9fab2ce0bbbaf091803772340cdbb04990daa9ddd +size 43655 diff --git a/output/mistral-small-2503/BFCL_v3_multi_turn_miss_param.parquet b/output/mistral-small-2503/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a5fd06e4987baf427bf619c86846f78e6c630d8e --- /dev/null +++ b/output/mistral-small-2503/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e9f1b0ceb01c5ba3954b3e4aaf3753a39504dbb42cc3bb02c12d1ec192b37d8f +size 43451 diff --git a/output/mistral-small-2503/tau_long_context.parquet b/output/mistral-small-2503/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..fb1373486d74ebd79119c4297514ab1b291f016c --- /dev/null +++ b/output/mistral-small-2503/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf1b429b427b6ce2e1f7271384ca0dcefa61ce30898d30eed36a0c5c54a6901e +size 43146 diff --git a/output/mistral-small-2503/toolace_single_func_call_1.parquet b/output/mistral-small-2503/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..50b2bb036a8ddf818c82aaa9270d405851c20c1e --- /dev/null +++ b/output/mistral-small-2503/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe315f4880a3b7c5d76d15515e1110a8b75c511b7ea0942b5cf7e7840269bc61 +size 14578 diff --git a/output/mistral-small-2503/toolace_single_func_call_2.parquet b/output/mistral-small-2503/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..48f5b04ef9b2adccee8b1d5b94ffb819745be8a7 --- /dev/null +++ b/output/mistral-small-2503/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0beee366878a26e1a56fccdb497606f1ddce03818dfb3713a167bf3c744b7714 +size 12600 diff --git a/output/mistral-small-2503/xlam_multiple_tool_multiple_call.parquet b/output/mistral-small-2503/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..67d2366e875ed1190ddec0b28ae9b73f2e05e038 --- /dev/null +++ b/output/mistral-small-2503/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5befaf630e6259340c5d5d94fb05f9c28ed3e20fe0963e712814b4af5511a24f +size 94127 diff --git a/output/mistral-small-2503/xlam_multiple_tool_single_call.parquet b/output/mistral-small-2503/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..fa94b9f5d8a88eb1b615c4feef916928933d16b4 --- /dev/null +++ b/output/mistral-small-2503/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6368437f681a19fe51dc32fd7bd727b6f37a46e2234867bdaa3628434354de2d +size 37913 diff --git a/output/mistral-small-2503/xlam_single_tool_multiple_call.parquet b/output/mistral-small-2503/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a01d98c44010918f518c5225db96b5335e4438b2 --- /dev/null +++ b/output/mistral-small-2503/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3202627a0ae360dac25519f054fcabc320b9865c389992e24233e09708d1fa81 +size 30355 diff --git a/output/mistral-small-2503/xlam_single_tool_single_call.parquet b/output/mistral-small-2503/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f12cbcfd7fbe9d1e8afeb0dd010cd06348293c10 --- /dev/null +++ b/output/mistral-small-2503/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ef76828b77c503f45fe54f6d7f41d58a7bc322f593d32cf69e6c007c66dd980 +size 44705 diff --git a/output/mistral-small-2503/xlam_tool_miss.parquet b/output/mistral-small-2503/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e54464dbee0118e1e43436cff283030904e5c23c --- /dev/null +++ b/output/mistral-small-2503/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3483f2a787130fdee483ca52cab3a1bac39cbb1bdab4064073dc2b43822b8e20 +size 45886 diff --git a/output/palmyra-x-004/BFCL_v3_irrelevance.parquet b/output/palmyra-x-004/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7502f121ea56c4f3b3c7398890f4c673692d6715 --- /dev/null +++ b/output/palmyra-x-004/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12c857ae98763ff945741d7ee16597ad3e3c99652889aacb73d88f369d1afad4 +size 39480 diff --git a/output/palmyra-x-004/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/palmyra-x-004/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b7eead34f7a9483d124258186e734f9bf38d4ba5 --- /dev/null +++ b/output/palmyra-x-004/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc2150e7462e30f6c2a4e1dec098b6000a55d659e4f27f5b2c28759ab55c4cd9 +size 27121 diff --git a/output/palmyra-x-004/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/palmyra-x-004/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1b447d09c07839ad42c6ca615fe168fc8221cc45 --- /dev/null +++ b/output/palmyra-x-004/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b2079de7e06129482effaa1330289b75bc0feacc50706e555513babd4278e3b +size 24860 diff --git a/output/palmyra-x-004/BFCL_v3_multi_turn_composite.parquet b/output/palmyra-x-004/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..95c9790a8db0f096871bf64959476d437138a8a3 --- /dev/null +++ b/output/palmyra-x-004/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8bf50a8d0cba7af40cfa9bef554214aa5a6002be73a8d53e9fc50e7a2c5b581 +size 50811 diff --git a/output/palmyra-x-004/BFCL_v3_multi_turn_long_context.parquet b/output/palmyra-x-004/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4c84a1ac01c8fd62b6ff09d97e3d02a5716b8e79 --- /dev/null +++ b/output/palmyra-x-004/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1008c35134a2101802a380502be401d85250fc11471bc5c84d025c74ebda62b5 +size 44066 diff --git a/output/palmyra-x-004/BFCL_v3_multi_turn_miss_func.parquet b/output/palmyra-x-004/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2a4b372b2b308cea43e3b9e7b98973cd7657963f --- /dev/null +++ b/output/palmyra-x-004/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c4e4d81da7d7af99e97aa58da93f1dc182a92cd02e4113d9d5a2cb529838148 +size 50290 diff --git a/output/palmyra-x-004/BFCL_v3_multi_turn_miss_param.parquet b/output/palmyra-x-004/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..75b1f8363c0402b028ee7f4cd3c6215e008e024a --- /dev/null +++ b/output/palmyra-x-004/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:834d8b71fc767a48588563cb26593719fbe1decace45c8ede913ad39b59115cf +size 50509 diff --git a/output/palmyra-x-004/tau_long_context.parquet b/output/palmyra-x-004/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0007ba18d1efd78bfd575d9dfe3301133be618ac --- /dev/null +++ b/output/palmyra-x-004/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5458e269f44af3a2e2a91965ebcb50f51e5496af179d752eb63bb8ce34a5dafc +size 52332 diff --git a/output/palmyra-x-004/toolace_single_func_call_1.parquet b/output/palmyra-x-004/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e1289e5e3076fa58601ac696f0ebba9364f0580f --- /dev/null +++ b/output/palmyra-x-004/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3c068a6751632777dabcaa72e4a8c7bc6d60523e4ae9dc491cbd889ecaaabba +size 17802 diff --git a/output/palmyra-x-004/toolace_single_func_call_2.parquet b/output/palmyra-x-004/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d833e4db2fc604f6706f80a6166a2e91547b326e --- /dev/null +++ b/output/palmyra-x-004/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f2dcac065e1f3c17603795aceb7aa85b7f9a58a732c0ab779d15f0823185867 +size 13217 diff --git a/output/palmyra-x-004/xlam_multiple_tool_multiple_call.parquet b/output/palmyra-x-004/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e5b013d7e0ef8f7f98cad66442f92ac3dfca0835 --- /dev/null +++ b/output/palmyra-x-004/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27402b96a92b5794e7f3323371c3657f97ed43bab2a33af5e5760f251512bec0 +size 102286 diff --git a/output/palmyra-x-004/xlam_multiple_tool_single_call.parquet b/output/palmyra-x-004/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..34c2ccf4cc345a7b4693c6e61f64c98a9d9eaece --- /dev/null +++ b/output/palmyra-x-004/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8880ca68b26bc3e70d8cb1f7e9bfa09464b04795d02a721f05b6bff9a575a0d4 +size 35003 diff --git a/output/palmyra-x-004/xlam_single_tool_multiple_call.parquet b/output/palmyra-x-004/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8e09b9ae5403f60d0c3858a7efe4b9e406aafbf2 --- /dev/null +++ b/output/palmyra-x-004/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8aa356713846fbbe244c55f55dd82eb5a47a7cf804c7f60c52793929d707e116 +size 32046 diff --git a/output/palmyra-x-004/xlam_single_tool_single_call.parquet b/output/palmyra-x-004/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e84d845904c3883083f11efe36443835aef46ab7 --- /dev/null +++ b/output/palmyra-x-004/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:124edf2c75a6c40dbea27243ed4281e1d37d0f5388622c0365bf76b659dedc38 +size 43105 diff --git a/output/palmyra-x-004/xlam_tool_miss.parquet b/output/palmyra-x-004/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b1039c50370e2cf4603058f94a31e39c8de2678b --- /dev/null +++ b/output/palmyra-x-004/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8d698cffb01dd30ae64f469f138c446826c908ce99f203fbb5b8b996835b85 +size 53919 diff --git a/results.csv b/results.csv index 14f788d0744d3db4b18e28ffe4cfc98e3b305deb..caee82085b65841954d6834d1d7d323521ac38b6 100644 --- a/results.csv +++ b/results.csv @@ -2,14 +2,19 @@ Model,Model Type,Model Output Type,Vendor,Input cost per million token,Output co claude-3-7-sonnet-20250219,Private,Reasoning,Anthropic,3,15,0.953,0.96,0.95,0.92,0.96,1,0.95,0.97,1,0.96,0.94,0.97,0.96,0.99,0.82,0.92,0.975 gemini-2.0-flash-001,Private,Normal,Google,0.15,0.6,0.938,0.95,0.93,0.91,0.94,0.9,0.96,0.92,0.95,0.89,0.91,0.98,0.93,0.97,0.98,0.93,0.965 gemini-2.0-flash-lite-001,Private,Normal,Google,0.075,0.3,0.933,0.96,0.91,0.81,0.98,0.98,0.9,0.91,0.92,0.98,0.86,0.99,0.87,0.97,0.96,0.95,0.975 +mistral-small-2503,Open source,Normal,Mistral,0.1,0.3,0.912,0.93,0.89,0.85,0.93,0.86,0.91,0.9,1,0.83,0.81,0.99,0.87,0.99,0.95,0.9,0.975 gpt-4o-2024-11-20,Private,Normal,OpenAI,2.5,10,0.900,0.92,0.88,0.85,0.9,0.92,0.95,0.88,0.99,0.63,0.83,0.98,0.89,0.98,0.98,0.86,0.965 gpt-4.5-preview-2025-02-27,Private,Normal,OpenAI,75,150,0.900,0.93,0.87,0.85,0.91,0.92,0.97,0.92,0.99,0.67,0.85,0.98,0.85,1,0.98,0.8,0.915 gemini-1.5-flash,Private,Normal,Google,0.075,0.3,0.895,0.88,0.91,0.9,0.9,0.89,0.87,0.91,0.83,0.71,0.87,0.98,0.89,0.94,0.93,0.92,0.99 +palmyra-x-004,Private,Normal,Writer,5,12,0.886,0.92,0.85,0.91,0.78,0.89,0.94,0.84,0.97,0.69,0.86,1,0.76,1,0.98,0.84,0.95 gemini-1.5-pro,Private,Normal,Google,1.25,5,0.885,0.87,0.91,0.89,0.93,0.75,0.97,0.9,0.87,0.57,0.91,0.94,0.92,0.99,0.97,0.86,0.925 o1-2024-12-17,Private,Reasoning,OpenAI,15,60,0.876,0.83,0.92,0.89,0.92,0.98,0.71,0.91,0.99,0.73,0.88,0.98,0.96,1,0.43,0.94,0.95 +amazon.nova-pro-v1,Private,Normal,Amazon,0.8,3.2,0.868,0.94,0.79,0.77,0.81,0.94,0.97,0.73,0.93,0.93,0.78,0.92,0.81,0.94,0.97,0.75,0.9 +amazon.nova-lite-v1,Private,Normal,Amazon,0.06,0.24,0.868,0.91,0.83,0.83,0.87,0.83,0.9,0.9,0.93,0.91,0.75,0.94,0.74,0.88,0.96,0.78,0.925 o3-mini-2025-01-31,Private,Reasoning,OpenAI,1.1,4.4,0.847,0.80,0.90,0.87,0.91,0.84,0.72,0.93,0.98,0.63,0.85,0.97,0.84,1,0.43,0.91,0.975 mistral-small-2501,Open source,Normal,Mistral,0.1,0.3,0.832,0.88,0.78,0.83,0.78,0.92,0.97,0.76,0.99,0.62,0.8,0.82,0.77,0.95,0.92,0.74,0.775 gpt-4o-mini,Private,Normal,OpenAI,0.15,0.6,0.832,0.85,0.82,0.82,0.85,0.51,0.98,0.83,1,0.54,0.83,0.94,0.83,0.96,0.99,0.73,0.835 +amazon.nova-micro-v1,Private,Normal,Amazon,0.035,0.14,0.829,0.90,0.75,0.77,0.79,0.8,0.97,0.69,0.87,0.89,0.74,0.93,0.68,0.91,0.96,0.7,0.91 qwen2.5-72b-instruct,Open source,Normal,Alibaba,0.9,0.9,0.817,0.80,0.84,0.84,0.87,0.92,0.63,0.86,0.99,0.66,0.79,0.99,0.77,0.97,0.42,0.78,0.95 mistral-large-2411,Private,Normal,Mistral,2,6,0.810,0.87,0.75,0.77,0.76,0.83,0.93,0.75,0.97,0.65,0.77,0.87,0.78,0.9,0.94,0.7,0.725 claude-3-5-sonnet-20241022,Private,Normal,Anthropic,3,15,0.801,0.83,0.77,0.68,0.81,0.68,0.78,0.85,0.91,0.92,0.67,0.9,0.75,0.74,0.88,0.69,0.955 @@ -19,4 +24,4 @@ mistral-small-2409,Private,Normal,Mistral,0.2,0.6,0.750,0.82,0.68,0.7,0.77,0.72, ministral-8b-2410,Private,Normal,Mistral,0.1,0.1,0.689,0.73,0.65,0.75,0.59,0.73,0.98,0.66,0.98,0.34,0.78,0.24,0.81,0.9,0.95,0.53,0.41 Meta-Llama-3.1-8B-Instruct-Turbo,Open source,Normal,Meta,0.2,0.2,0.678,0.71,0.64,0.77,0.49,0.44,0.96,0.66,0.98,0.25,0.73,0.48,0.76,0.93,0.96,0.51,0.575 open-mistral-nemo-2407,Open source,Normal,Mistral,0.15,0.15,0.661,0.68,0.64,0.7,0.64,0.51,0.98,0.68,0.99,0.26,0.78,0.21,0.75,0.9,0.94,0.51,0.41 -Dataset Avg,,,,,,,0.84,0.81,0.82,0.81,0.79,0.89,0.82,0.96,0.64,0.82,0.84,0.83,0.93,0.86,0.76,0.82 \ No newline at end of file +Dataset Avg,,,,,,,0.86,0.81,0.82,0.81,0.81,0.90,0.82,0.95,0.68,0.81,0.86,0.82,0.93,0.88,0.76,0.85 \ No newline at end of file