,Model,Model Type,Score,Chat,Chat Hard,Safety,Reasoning,Prior Sets (0.5 weight)
1,"infly/INF-ORM-Llama3.1-70B",Seq. Classifier,95.10529562974679,96.64804469273743,91.00877192982456,93.64864864864865,99.1157172477765,
2,"ShikaiChen/LDL-Reward-Gemma-2-27B-v0.1",Seq. Classifier,94.99413134933042,96.36871508379889,90.78947368421052,93.78378378378379,99.03455284552845,
3,"nicolinho/QRM-Gemma-2-27B",Seq. Classifier,94.43611331484493,96.64804469273743,90.13157894736842,92.70270270270271,98.26212691657118,
4,"Skywork/Skywork-Reward-Gemma-2-27B-v0.2",Seq. Classifier,94.26093621016115,96.08938547486034,89.91228070175438,92.97297297297297,98.0691056910569,
5,"nvidia/Llama-3.1-Nemotron-70B-Reward *",Custom Classifier,94.10897209520822,97.48603351955308,85.74561403508773,95.13513513513513,98.0691056910569,
6,"Skywork/Skywork-Reward-Gemma-2-27B ⚠️",Seq. Classifier,93.80116450605776,95.81005586592178,91.44736842105263,91.89189189189189,96.05534184536477,
7,"SF-Foundation/TextEval-Llama3.1-70B * ⚠️",Generative,93.48032435319458,94.1340782122905,90.13157894736842,93.24324324324324,96.41239700987613,
8,"meta-metrics/MetaMetrics-RM-v1.0",Custom Classifier,93.42462545063005,98.32402234636872,86.40350877192982,90.8108108108108,98.16015987341082,
9,"Skywork/Skywork-Critic-Llama-3.1-70B ⚠️",Generative,93.30801781900792,96.64804469273743,87.93859649122807,93.10810810810811,95.5373219839581,
10,"nicolinho/QRM-Llama3.1-8B-v2",Seq. Classifier,93.13653373860271,96.36871508379889,86.84210526315789,92.56756756756756,96.76774703988652,
11,"Skywork/Skywork-Reward-Llama-3.1-8B-v0.2",Seq. Classifier,93.12997963530022,94.6927374301676,88.37719298245614,92.70270270270271,96.7472854258744,
12,"nicolinho/QRM-Llama3.1-8B ⚠️",Seq. Classifier,93.05891420009982,94.41340782122904,89.69298245614036,92.29729729729729,95.83196922573254,
13,"LxzGordon/URM-LLaMa-3.1-8B ⚠️",Seq. Classifier,92.93773298857982,95.53072625698324,88.15789473684211,91.08108108108108,96.98122987941288,
14,"Salesforce/SFR-LLaMa-3.1-70B-Judge-r *",Generative,92.71833683150776,96.92737430167598,84.75877192982456,91.62162162162163,97.56557947290882,
15,"R-I-S-E/RISE-Judge-Qwen2.5-32B",Generative,92.66088172895866,96.64804469273743,83.33333333333333,91.89189189189189,98.77025699787198,
16,"Skywork/Skywork-Reward-Llama-3.1-8B ⚠️",Seq. Classifier,92.52495013691698,95.81005586592178,87.28070175438596,90.8108108108108,96.19823211654936,
17,"AtlaAI/Selene-1",Generative,92.41086740661206,97.76536312849161,83.99122807017544,92.16216216216216,95.72471626561904,
18,"general-preference/GPM-Llama-3.1-8B ⚠️",Custom Classifier,92.23713029788581,93.29608938547486,88.59649122807018,91.08108108108108,95.97485949691712,
19,"nvidia/Nemotron-4-340B-Reward *",Custom Classifier,91.9958677606516,95.81005586592178,87.06140350877193,91.48648648648648,93.6255251814263,
20,"Ray2333/GRM-Llama3-8B-rewardmodel-ft ⚠️",Seq. Classifier,91.53526049213252,95.53072625698324,86.1842105263158,90.8108108108108,93.61529437442026,
21,"nicolinho/QRM-Llama3-8B ⚠️",Seq. Classifier,91.0990919512119,95.81005586592178,81.14035087719299,89.86486486486487,97.581096196868,
22,"SF-Foundation/TextEval-OffsetBias-12B *",Generative,91.04924182882311,91.89944134078212,86.62280701754386,92.02702702702703,93.64769192993944,
23,"Ray2333/GRM-llama3.2-3B-rewardmodel-ft",Seq. Classifier,90.92295892363056,91.62011173184358,84.86842105263158,92.70270270270271,94.50060020734435,
24,"Salesforce/SFR-nemo-12B-Judge-r *",Generative,90.26551100385808,97.20670391061452,82.23684210526316,86.48648648648648,95.13201151306815,
25,"internlm/internlm2-20b-reward",Seq. Classifier,90.15948083664846,98.88268156424581,76.53508771929825,89.45945945945945,95.76069460359032,
26,"Skywork/Skywork-VL-Reward-7B",Seq. Classifier,90.07022246172819,89.94413407821229,87.5,91.08108108108108,91.75567468761938,
27,"facebook/Self-taught-evaluator-llama3.1-70B *",Generative,90.01358317701886,96.92737430167598,85.08771929824562,89.5945945945946,88.44464451355923,
28,"LxzGordon/URM-LLaMa-3-8B",Seq. Classifier,89.90981543420907,96.92737430167598,78.7280701754386,88.24324324324324,95.74057401647842,
29,"NCSOFT/Llama-3-OffsetBias-RM-8B",Seq. Classifier,89.41975692993036,97.20670391061452,81.79824561403508,86.75675675675676,91.91732143831506,
30,"AtlaAI/Selene-1-Mini-Llama-3.1-8B",Generative,89.12784912886812,93.57541899441341,79.3859649122807,89.25675675675676,94.29325585202162,
31,"Skywork/Skywork-Critic-Llama-3.1-8B",Generative,88.95511699074142,93.57541899441341,81.35964912280701,91.08108108108108,89.80431876466416,
32,"nvidia/Llama3-70B-SteerLM-RM *",Custom Classifier,88.76963582088416,91.34078212290503,80.26315789473684,92.83783783783784,90.63676542805698,
33,"Salesforce/SFR-LLaMa-3.1-8B-Judge-r *",Generative,88.65372403487248,95.53072625698324,77.74122807017544,86.21621621621621,95.12672559611501,
34,"facebook/Self-taught-Llama-3-70B *",Generative,88.62795600264494,96.92737430167598,83.99122807017544,91.08108108108108,82.5121405576472,
35,"RLHFlow/ArmoRM-Llama3-8B-v0.1",Custom Classifier,88.60367185781917,96.92737430167598,76.75438596491227,90.54054054054055,97.34715174332952,74.29414161945574
36,"Ray2333/GRM-gemma2-2B-rewardmodel-ft",Seq. Classifier,88.39250002515702,93.01675977653632,77.19298245614036,92.16216216216216,91.19809570578929,
37,"google/gemini-1.5-pro-0514 *",Generative,88.20069001791948,92.31843575418995,80.59210526315789,87.9054054054054,91.98681364892467,
38,"R-I-S-E/RISE-Judge-Qwen2.5-7B",Generative,88.19099980224239,92.17877094972067,76.53508771929825,87.97297297297297,96.07716756697768,
39,"Cohere May 2024 *",Custom Classifier,88.16038708182192,96.36871508379889,71.2719298245614,92.29729729729729,97.68272221312816,78.20215489882585
40,"google/flame-1.0-24B-july-2024 *",Generative,87.80801832232187,92.17877094972067,75.65789473684211,89.5945945945946,93.80081300813008,
41,"internlm/internlm2-7b-reward",Seq. Classifier,87.59316719911449,99.16201117318435,69.51754385964912,87.16216216216216,94.53095160146232,
42,"ZiyiYe/Con-J-Qwen2-7B ⚠️",Generative,87.12028871485069,91.89944134078212,80.26315789473684,88.24324324324324,88.0753123806406,
43,"google/gemini-1.5-pro-0924",Generative,86.78430992050927,94.1340782122905,76.97368421052632,85.8108108108108,90.21866644840945,
44,"openai/gpt-4o-2024-08-06",Generative,86.72554986675267,96.08938547486034,76.09649122807018,88.10810810810811,86.60821465597208,
45,"RLHFlow/pair-preference-model-LLaMA3-8B",Custom Classifier,85.74792972712865,98.32402234636872,65.78947368421052,89.72972972972973,94.73420363398264,74.57650875557454
46,"Ray2333/GRM-llama3-8B-sftreg",Seq. Classifier,85.42084389305319,98.60335195530726,67.76315789473684,89.1891891891892,92.29347410923774,73.08924874053665
47,"opencompass/CompassJudger-1-32B-Instruct",Generative,85.22047081369766,98.04469273743017,65.13157894736842,85.27027027027027,92.43534129972173,
48,"Cohere March 2024 *",Custom Classifier,85.10802881361649,94.6927374301676,65.13157894736842,87.70270270270271,98.17073170731707,74.57675774743672
49,"Ray2333/GRM-llama3-8B-distill",Seq. Classifier,84.63918882385776,98.32402234636872,68.42105263157895,86.75675675675676,91.3273449009658,72.09434614337957
50,"Ray2333/GRM-Gemma-2B-rewardmodel-ft ⚠️",Seq. Classifier,84.46827345209587,89.3854748603352,75.21929824561404,84.45945945945945,88.80886124297484,
51,"openai/gpt-4-0125-preview",Generative,84.33564801010327,95.25139664804469,74.34210526315789,87.56756756756756,86.9236645386588,70.85136405607162
52,"mattshumer/Reflection-70B",Generative,84.22327632009588,97.48603351955308,70.6140350877193,83.17567567567568,85.61736099743548,
53,"Anthropic/claude-3-5-sonnet-20240620",Generative,84.17242041164789,96.36871508379889,74.01315789473684,81.62162162162163,84.68618704643423,
54,"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo",Generative,84.12067803631126,97.20670391061452,74.56140350877193,77.56756756756756,87.14703715829104,
55,"opencompass/CompassJudger-1-14B-Instruct",Generative,84.09022697921793,97.48603351955308,62.280701754385966,83.91891891891892,92.67525372401374,
56,"meta-llama/Meta-Llama-3.1-70B-Instruct",Generative,84.05217990917473,97.20670391061452,70.17543859649123,82.83783783783784,85.98873929175534,
57,"NCSOFT/Llama-3-OffsetBias-8B",Generative,83.96777752436938,92.45810055865921,80.26315789473684,86.75675675675676,76.39309488732471,
58,"openai/gpt-4-turbo-2024-04-09",Generative,83.95011678629895,95.25139664804469,75.43859649122807,87.56756756756756,82.70345664866045,73.629016365689
59,"sfairXC/FsfairX-LLaMA3-RM-v0.1",Seq. Classifier,83.38339965331156,99.44134078212291,65.13157894736842,86.75675675675676,86.43633709827031,74.91856971076719
60,"openai/gpt-4o-2024-05-13",Generative,83.2681071132992,96.64804469273743,70.39473684210526,86.48648648648648,84.86965951874285,72.61510893954863
61,"opencompass/CompassJudger-1-7B-Instruct",Generative,83.16709323590604,97.76536312849161,60.96491228070175,84.45945945945945,89.47863807497134,
62,"internlm/internlm2-1_8b-reward",Seq. Classifier,82.16733515408055,93.57541899441341,66.2280701754386,81.62162162162163,87.24422982484859,
63,"CIR-AMS/BTRM_Qwen2_7b_0613",Seq. Classifier,81.72269085246006,97.48603351955308,57.23684210526316,90.13513513513513,87.74894963714738,70.2902968779431
64,"openbmb/Eurus-RM-7b",Seq. Classifier,81.58895090730017,98.04469273743017,65.5701754385965,81.35135135135135,86.3251623288045,71.71779445333651
65,"Nexusflow/Starling-RM-34B",Seq. Classifier,81.33351263768401,96.92737430167598,57.23684210526316,87.70270270270271,88.45078299776287,71.36620952434669
66,"google/gemma-2-27b-it",Generative,80.89669003773389,94.83240223463687,59.10087719298246,86.35135135135135,83.30212937196487,
67,"google/gemini-1.5-flash-001",Generative,80.5391103484727,92.17877094972067,63.48684210526316,86.95945945945945,85.1162219675888,69.36940417219024
68,"Ray2333/Gemma-2B-rewardmodel-ft ⚠️",Seq. Classifier,80.47843057507436,77.93296089385476,74.78070175438596,85.27027027027027,83.92978938178643,
69,"allenai/tulu-v2.5-13b-preference-mix-rm",Seq. Classifier,80.26558812003782,93.57541899441341,68.20175438596492,77.29729729729729,88.50261908659355,67.23611355180205
70,"Anthropic/claude-3-opus-20240229",Generative,80.0759036376447,94.6927374301676,60.30701754385965,86.62162162162163,78.68223795492989,
71,"openai/gpt-4o-mini-2024-07-18",Generative,80.06759386119498,94.97206703910615,60.74561403508772,80.8108108108108,83.7418835597752,
72,"weqweasdas/RM-Mistral-7B",Seq. Classifier,79.8233742639417,96.64804469273743,60.526315789473685,87.02702702702703,77.35615485349484,75.29528365000934
73,"NousResearch/Hermes-3-Llama-3.1-70B",Generative,78.47084260833167,96.22905027932961,56.68859649122807,82.29729729729729,78.6684263654717,
74,"hendrydong/Mistral-RM-for-RAFT-GSHF-v0",Seq. Classifier,78.46503174091394,98.32402234636872,57.89473684210526,85.0,74.33602062530693,75.07572604066365
75,"meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",Generative,78.08002309698713,87.56983240223464,66.8859649122807,75.06756756756756,82.79672750586566,
76,"Ray2333/reward-model-Mistral-7B-instruct-Unifie...",Seq. Classifier,76.61192139206588,97.76536312849161,50.6578947368421,85.27027027027027,73.88893435914224,74.3423675391006
77,"Ahjeong/MMPO_Gemma_7b_gamma1.1_epoch3",DPO,76.52088102568138,97.20670391061452,63.37719298245614,76.35135135135135,72.84129972172205,69.13483329884433
78,"stabilityai/stablelm-2-12b-chat",DPO,76.41872322421631,96.64804469273743,55.48245614035088,78.10810810810811,89.44862770775359,48.39403572004667
79,"meta-llama/Meta-Llama-3-70B-Instruct",Generative,76.26515082171642,97.62569832402235,58.88157894736842,72.97297297297297,78.53644895509358,70.3529589965331
80,"allenai/tulu-2-dpo-70b",DPO,76.20735542607979,97.48603351955308,60.526315789473685,84.45945945945945,74.07206580455066,52.778449688644265
81,"gemini-1.5-flash-8b",Generative,76.00524043227317,94.41340782122904,59.86842105263158,73.98648648648648,75.75264636874557,
82,"Ahjeong/MMPO_Gemma_7b",DPO,75.8660587247668,96.92737430167598,61.40350877192982,71.35135135135135,77.55872483221475,68.31261000855747
83,"PoLL/gpt-3.5-turbo-0125_claude-3-sonnet-2024022...",Generative,75.77705517745792,95.25139664804469,54.05701754385965,80.33783783783784,73.46196868008948,
84,"allenai/llama-3-tulu-2-dpo-70b",DPO,74.9612075859509,96.36871508379889,57.45614035087719,74.86486486486487,80.2023653625798,56.86669694931664
85,"NousResearch/Nous-Hermes-2-Mistral-7B-DPO",DPO,74.80880493527766,92.17877094972067,60.526315789473685,82.43243243243244,73.75184154526109,55.500522983723165
86,"Anthropic/claude-3-sonnet-20240229",Generative,74.57545943180953,93.43575418994413,56.578947368421055,81.6891891891892,69.07005374583947,69.63124589949818
87,"mistralai/Mixtral-8x7B-Instruct-v0.1",DPO,74.54632435829336,94.97206703910615,64.03508771929825,72.56756756756756,78.71855731980139,50.330359933093675
88,"prometheus-eval/prometheus-8x7b-v2.0",Generative,74.5095375782243,93.01675977653632,47.14912280701754,80.47297297297297,77.39929475637038,
89,"Ray2333/GRM-Gemma-2B-sftreg",Seq. Classifier,74.50927082674883,95.53072625698324,48.68421052631579,79.32432432432432,76.83949909968898,69.82591702611495
90,"general-preference/GPM-Gemma-2B",Custom Classifier,74.49128373533642,71.50837988826815,69.73684210526316,81.21621621621621,75.50369673159818,
91,"0-hero/Matter-0.1-7B-boost-DPO-preview",DPO,74.47914014376505,91.06145251396649,60.96491228070175,71.35135135135135,83.94718175369673,55.6624654944527
92,"allenai/tulu-v2.5-70b-uf-rm",Seq. Classifier,73.98314832639727,86.59217877094972,71.71052631578948,70.13513513513513,75.70046925301467,57.571715987797305
93,"HuggingFaceH4/zephyr-7b-alpha",DPO,73.92192687696839,91.62011173184358,62.5,76.62162162162163,75.13982102908277,53.534233127619544
94,"upstage/SOLAR-10.7B-Instruct-v1.0",DPO,73.91132026830088,81.56424581005587,68.64035087719299,85.13513513513513,72.51596005892944,49.49049865208112
95,"allenai/tulu-2-dpo-13b",DPO,73.68126195691116,95.81005586592178,58.333333333333336,79.45945945945945,73.22972936105201,49.46620157266727
96,"opencompass/CompassJudger-1-1.5B-Instruct",Generative,73.44238723104029,96.36871508379889,49.23245614035088,78.17567567567568,69.99270202433568,
97,"allenai/llama-3-tulu-2-8b-uf-mean-rm",Seq. Classifier,73.41574916848018,95.25139664804469,59.21052631578947,61.62162162162162,82.1155262727124,64.3436007999852
98,"HuggingFaceH4/starchat2-15b-v0.1",DPO,73.22060109644468,93.85474860335195,55.48245614035088,70.94594594594595,81.58522944289845,55.248649602907626
99,"Ray2333/Gemma-2B-rewardmodel-baseline",Seq. Classifier,72.89758740021966,94.1340782122905,46.92982456140351,78.64864864864865,73.84050853931359,68.97216667866445
100,"Anthropic/claude-3-haiku-20240307",Generative,72.89194286431167,92.73743016759776,51.973684210526315,79.52702702702703,70.60194658154636,66.34730980541012
101,"HuggingFaceH4/zephyr-7b-beta",DPO,72.80507814531524,95.25139664804469,62.719298245614034,65.67567567567568,77.89497735581382,52.16300745754066
102,"allenai/llama-3-tulu-2-dpo-8b",DPO,72.74751270450155,95.25139664804469,53.50877192982456,66.48648648648648,86.63038140448519,50.973541402832126
103,"0-hero/Matter-0.1-7B-DPO-preview",DPO,72.47264404067178,89.3854748603352,57.675438596491226,63.78378378378378,88.54320128771758,53.477999309390405
104,"jondurbin/bagel-dpo-34b-v0.5",DPO,72.15167952196515,93.85474860335195,55.04385964912281,64.45945945945945,88.8907076990233,44.867564875771365
105,"allenai/tulu-2-dpo-7b",DPO,72.11611434356087,97.48603351955308,56.14035087719298,75.27027027027027,71.75717520598025,47.737369346054734
106,"prometheus-eval/prometheus-7b-v2.0",Generative,72.04295178846496,85.47486033519553,49.12280701754386,77.0945945945946,76.4795452065259,
107,"stabilityai/stablelm-zephyr-3b",DPO,71.45809212918405,86.31284916201118,60.08771929824562,74.05405405405405,75.73184372783325,50.74989667836822
108,"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",DPO,71.38329552978793,91.62011173184358,60.526315789473685,81.48648648648648,61.26104927156654,52.66173320935087
109,"ai2/tulu-2-7b-rm-v0-nectar-binarized-700k.json",Seq. Classifier,71.27478404602779,93.57541899441341,40.78947368421053,79.45945945945945,,
110,"berkeley-nest/Starling-RM-7B-alpha",Seq. Classifier,71.13020256724107,98.04469273743017,45.6140350877193,84.45945945945945,57.998444917335085,67.93855870128164
111,"ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",Seq. Classifier,70.58403596186601,95.25139664804469,39.473684210526315,77.02702702702703,,
112,"CohereForAI/c4ai-command-r-plus",Generative,70.56998248762835,95.11173184357541,57.56578947368421,59.86486486486486,70.40312789872866,69.23881422694875
113,"ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",Seq. Classifier,70.19339171573809,94.97206703910615,37.5,78.10810810810811,,
114,"allenai/llama-3-tulu-2-70b-uf-mean-rm",Seq. Classifier,70.19307792664753,86.31284916201118,56.14035087719298,60.945945945945944,82.68367708844875,59.57205519263016
115,"ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",Seq. Classifier,70.07936854820123,93.85474860335195,38.81578947368421,77.56756756756756,,
116,"ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",Seq. Classifier,70.03734328271229,94.1340782122905,38.81578947368421,77.16216216216216,,
117,"weqweasdas/RM-Gemma-7B",Seq. Classifier,69.66957334431098,96.92737430167598,49.780701754385966,57.83783783783784,73.62395645768537,70.68641939562845
118,"ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",Seq. Classifier,69.44952818151877,93.85474860335195,37.06140350877193,77.43243243243244,,
119,"ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",Seq. Classifier,69.2421964746281,94.41340782122904,35.74561403508772,77.56756756756756,,
120,"weqweasdas/RM-Gemma-7B-4096",Seq. Classifier,69.22303170109127,94.97206703910615,50.219298245614034,56.08108108108108,75.10912860806461,70.24413536208964
121,"ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",Seq. Classifier,69.04502561956252,94.41340782122904,35.96491228070175,76.75675675675676,,
122,"openbmb/UltraRM-13b",Seq. Classifier,69.02867919901104,96.36871508379889,55.48245614035088,59.86486486486486,62.44270748076608,72.94062565153789
123,"OpenAssistant/oasst-rm-2.1-pythia-1.4b-epoch-2.5",Seq. Classifier,69.00517292135855,88.54748603351955,48.68421052631579,63.108108108108105,77.51882468489114,65.32929758655776
124,"openbmb/Eurus-7b-kto",DPO,68.99912142883106,95.25139664804469,53.728070175438596,60.54054054054054,74.67261417580619,52.606849779819356
125,"ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",Seq. Classifier,68.95403268602327,93.85474860335195,37.06140350877193,75.94594594594595,,
126,"Qwen/Qwen1.5-14B-Chat",DPO,68.64045386840729,57.262569832402235,70.17543859649123,71.21621621621621,89.61129753914987,41.23304044714641
127,"ai2/tulu-2-7b-rm-v0-nectar-binarized-3.8m-check...",Seq. Classifier,68.08398077583611,93.01675977653632,35.96491228070175,75.27027027027027,,
128,"RLHFlow/LLaMA3-iterative-DPO-final",DPO,67.82774529803461,83.79888268156425,59.21052631578947,78.64864864864865,61.60650952147105,43.920573347364794
129,"HuggingFaceH4/zephyr-7b-gemma-v0.1",DPO,67.57835885153328,95.81005586592178,49.56140350877193,58.24324324324324,74.63476018988378,51.70630404815817
130,"ai2/tulu-2-7b-rm-v0-nectar-binarized.json",Seq. Classifier,67.55772237983352,91.34078212290503,39.03508771929825,72.29729729729729,,
131,"Qwen/Qwen1.5-7B-Chat",DPO,67.50138253417825,53.63128491620112,69.07894736842105,69.1891891891892,90.41475691602555,42.884086027930344
132,"openbmb/MiniCPM-2B-dpo-fp32",DPO,67.304776500488,89.10614525139665,49.3421052631579,57.2972972972973,82.33378348884159,49.58432590300511
133,"mightbe/Better-PairRM",Custom Classifier,67.29754324103595,95.53072625698324,39.25438596491228,82.02702702702703,49.826076280897034,72.40145810968448
134,"allenai/OLMo-7B-Instruct",DPO,67.27282652187517,89.66480446927375,50.6578947368421,64.86486486486487,71.6763518306324,51.72760689365022
135,"Qwen/Qwen1.5-72B-Chat",DPO,67.23151527906012,62.29050279329609,66.00877192982456,67.56756756756756,85.54352867354177,42.26289558308108
136,"ai2/tulu-2-7b-rm-v0.json",Seq. Classifier,66.54559072450868,93.29608938547486,45.39473684210526,60.945945945945944,,
137,"Qwen/Qwen1.5-MoE-A2.7B-Chat",DPO,66.4408456376338,72.90502793296089,63.1578947368421,62.83783783783784,77.40082937742129,45.364430968579995
138,"RLHFlow/RewardModel-Mistral-7B-for-DPA-v1",Seq. Classifier,66.33145463112653,87.98882681564245,49.780701754385966,70.67567567567568,59.70835379494734,60.675975598835954
139,"stabilityai/stablelm-2-zephyr-1_6b",DPO,65.73535970393974,96.64804469273743,46.71052631578947,60.270270270270274,67.84218639166257,48.67618199453821
140,"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo",Generative,65.65164437199641,80.72625698324022,49.780701754385966,63.986486486486484,68.11313226387297,
141,"weqweasdas/RM-Gemma-2B",Seq. Classifier,65.48909618129333,94.41340782122904,40.78947368421053,49.86486486486486,76.37399738091341,66.51837812920436
142,"openai/gpt-3.5-turbo-0125",Generative,65.34011575979856,92.17877094972067,44.51754385964912,65.47297297297297,59.12315163420091,65.4761630050997
143,"allenai/tulu-v2.5-70b-preference-mix-rm",Seq. Classifier,65.15941759094567,77.37430167597765,59.21052631578947,84.86486486486487,41.37508866699405,60.785195271258935
144,"wenbopan/Faro-Yi-9B-DPO",DPO,64.61094996096162,92.17877094972067,53.07017543859649,55.13513513513514,58.392672013968465,63.945042573813076
145,"meta-llama/Meta-Llama-3-8B-Instruct",Generative,64.49786646478918,85.47486033519553,41.55701754385965,67.97297297297297,64.82341627107546,60.82426393689548
146,"ai2/llama-2-chat-ultrafeedback-60k.jsonl",Seq. Classifier,64.3955076805709,94.41340782122904,45.39473684210526,53.37837837837838,,
147,"IDEA-CCNL/Ziya-LLaMA-7B-Reward",Seq. Classifier,63.784551529691385,86.87150837988827,46.05263157894737,64.05405405405405,57.74540295738528,64.61376982667257
148,"PKU-Alignment/beaver-7b-v2.0-reward",Seq. Classifier,63.66172878401215,89.94413407821229,36.40350877192982,60.4054054054054,68.87004146887108,61.70937960727216
149,"stabilityai/stable-code-instruct-3b",DPO,62.1618132126384,57.82122905027933,58.55263157894737,65.54054054054055,75.28271130026737,45.06209397367635
150,"OpenAssistant/oasst-rm-2-pythia-6.9b-epoch-1",Seq. Classifier,61.501047673154666,92.45810055865921,37.280701754385966,54.45945945945946,58.55022644186174,68.01245262965921
151,"OpenAssistant/reward-model-deberta-v3-large-v2",Seq. Classifier,61.25988488574668,89.3854748603352,45.175438596491226,73.37837837837837,38.54968079882141,58.361018703667625
152,"llm-blender/PairRM-hf",Custom Classifier,60.868838250756006,90.22346368715084,52.19298245614035,47.7027027027027,48.983739837398375,69.61376689001952
153,"PKU-Alignment/beaver-7b-v2.0-cost",Seq. Classifier,59.56778097839703,57.262569832402235,45.6140350877193,76.08108108108108,62.111570360670044,53.97151608182796
154,"ContextualAI/archangel_sft-kto_llama13b",DPO,59.52205456101889,84.07821229050279,37.719298245614034,46.486486486486484,70.76683308779397,57.5968308283755
155,"ContextualAI/archangel_sft-kto_llama30b",DPO,59.00687538053444,84.35754189944134,40.57017543859649,60.54054054054054,50.75435150324658,58.616659661160035
156,"Qwen/Qwen1.5-1.8B-Chat",DPO,58.89567615638699,56.14525139664804,60.30701754385965,48.37837837837838,77.93283134173623,44.53412808623833
157,"ai2/llama-2-chat-7b-nectar-3.8m.json",Seq. Classifier,58.426789771247286,86.31284916201118,26.535087719298247,62.432432432432435,,
158,"PKU-Alignment/beaver-7b-v1.0-cost",Seq. Classifier,57.97567401900532,61.73184357541899,42.324561403508774,73.51351351351352,54.82109728815409,56.999034609857176
159,"ContextualAI/archangel_sft-dpo_llama30b",DPO,56.18285201407361,69.27374301675978,44.73684210526316,62.83783783783784,47.449118786489876,57.0505846339612
160,"ContextualAI/archangel_sft-kto_pythia1-4b",DPO,55.809930200702766,68.43575418994413,37.93859649122807,52.567567567567565,64.47488677906914,55.455761750707126
161,"ContextualAI/archangel_sft-kto_pythia6-9b",DPO,55.6117865296703,77.6536312849162,36.18421052631579,53.648648648648646,54.153707644459004,57.22568255835343
162,"ContextualAI/archangel_sft-kto_pythia2-8b",DPO,54.96592159422631,75.69832402234637,34.21052631578947,47.432432432432435,62.1572679652971,55.69619287630597
163,"Qwen/Qwen1.5-4B-Chat",DPO,54.77003940637828,38.8268156424581,62.719298245614034,55.67567567567568,66.89344955530092,44.69987641930703
164,"ContextualAI/archangel_sft-dpo_llama13b",DPO,53.99846978252061,71.22905027932961,42.98245614035088,56.486486486486484,44.013272766955865,56.56369669643977
165,"ContextualAI/archangel_sft-kto_llama7b",DPO,53.883046644273705,55.865921787709496,43.64035087719298,45.67567567567568,69.41432040159329,55.754882314120465
166,"ContextualAI/archangel_sft-dpo_llama7b",DPO,53.036829672694374,57.82122905027933,44.51754385964912,52.027027027027025,56.58147814699623,55.43691088634592
167,"Qwen/Qwen1.5-0.5B-Chat",DPO,52.982802188122534,35.47486033519553,62.93859649122807,57.027027027027025,59.83862607082447,46.28699984455265
168,"ContextualAI/archangel_sft-dpo_pythia2-8b",DPO,52.857927047782155,80.72625698324022,33.55263157894737,44.729729729729726,51.34671522889725,55.0106763884103
169,"my_model/",Seq. Classifier,52.672491797862534,45.53072625698324,55.921052631578945,43.91891891891892,65.319269383969,
170,"ContextualAI/archangel_sft-dpo_pythia6-9b",DPO,52.6326255248281,74.86033519553072,34.21052631578947,51.75675675675676,48.470153325694326,55.09808653591037
171,"ai2/llama-2-chat-nectar-180k.json",Seq. Classifier,52.34906620822528,88.26815642458101,28.50877192982456,40.270270270270274,,
172,"ContextualAI/archangel_sft-dpo_pythia1-4b",DPO,52.334628884533196,63.96648044692738,37.280701754385966,50.4054054054054,56.71652479947619,54.27343514840888
173,"stanfordnlp/SteamSHP-flan-t5-xl",Custom Classifier,51.34535042343637,85.47486033519553,36.8421052631579,37.83783783783784,38.41156490423965,64.97541713006551
174,"SultanR/SmolTulu-1.7b-RM",Seq. Classifier,50.93872947030961,74.30167597765363,44.078947368421055,57.16216216216216,28.212132373001584,
175,"ContextualAI/archangel_sft-kto_pythia12-0b",DPO,50.52988550561952,74.86033519553072,36.18421052631579,47.567567567567565,41.27175751623288,55.001227939281776
176,"weqweasdas/hh_rlhf_rm_open_llama_3b",Seq. Classifier,50.274817067272814,81.84357541899442,37.280701754385966,41.486486486486484,32.80815190702243,65.63552247167672
177,"ContextualAI/archangel_sft-dpo_pythia12-0b",DPO,50.08791349970499,66.75977653631286,36.40350877192982,54.32432432432432,41.39384514650516,53.02831193920059
178,"random",,50.0,50.0,50.0,50.0,50.0,50.0
179,"stanfordnlp/SteamSHP-flan-t5-large",Custom Classifier,49.62050475651485,85.75418994413408,33.1140350877193,37.432432432432435,35.62673923719103,62.72974940567991
180,"allenai/tulu-v2.5-13b-uf-rm",Seq. Classifier,48.05551076423311,39.385474860335194,42.324561403508774,55.54054054054054,47.36897746494243,63.26048833944414
181,"PKU-Alignment/beaver-7b-v1.0-reward",Seq. Classifier,47.26664990676508,81.84357541899442,28.728070175438596,37.567567567567565,34.596155944780925,59.929110947322734
182,"allenai/Llama-3.1-70B-Instruct-RM-RB2",Seq. Classifier,90.20891847250666,96.64804469273743,83.55263157894737,90.94594594594595,89.68905167239592,0.0
183,"allenai/Llama-3.1-8B-Instruct-RM-RB2",Seq. Classifier,88.85411761564486,95.81005586592178,81.57894736842105,89.32432432432432,88.70314290391227,0.0
184,"allenai/Llama-3.1-8B-Base-RM-RB2",Seq. Classifier,84.63022615056406,93.29608938547486,77.85087719298247,88.51351351351352,78.86042451028537,0.0
185,"allenai/Llama-3.1-Tulu-3-8B-SFT-RM-RB2",Seq. Classifier,85.5067097271751,94.97206703910615,79.16666666666666,87.83783783783784,80.05026736508975,0.0
186,"allenai/Llama-3.1-Tulu-3-8B-DPO-RM-RB2",Seq. Classifier,84.30783781180817,95.53072625698324,76.09649122807018,86.62162162162163,78.98251214055765,0.0
187,"allenai/Llama-3.1-Tulu-3-8B-RL-RM-RB2",Seq. Classifier,83.68729455170623,94.6927374301676,75.87719298245614,87.02702702702703,77.15222076717411,0.0
188,"allenai/Llama-3.1-Tulu-3-70B-SFT-RM-RB2",Seq. Classifier,88.9245750153865,96.92737430167597,82.67543859649122,90.27027027027027,85.82521689310852,0.0