allenpark commited on
Commit
e3e882a
Β·
1 Parent(s): 45f3b77

feat: add new examples

Browse files
Files changed (1) hide show
  1. app.py +76 -8
app.py CHANGED
@@ -48,15 +48,83 @@ Your output must in the following format:
48
 
49
  EXAMPLES = [
50
  {
51
- "emoji": "🌁",
52
- "model_output": "The sky is green.",
53
- "user_input": "What color is the sky?",
54
- "gold_answer": "",
55
- "retrieved_context": "The sky is blue.",
56
- "pass_criteria": "Is the MODEL OUTPUT grounded in the CONTEXT?",
57
- "rubric": "0. The pass criteria is not satisfied and not accurately followed\n1. The pass criteria is satisfied and accurately followed",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  }
59
-
60
  ]
61
 
62
  HEADER = """
 
48
 
49
  EXAMPLES = [
50
  {
51
+ "emoji": "πŸ’Š",
52
+ "model_output": "Metformin works by reducing glucose production in the liver and improving insulin sensitivity.",
53
+ "user_input": "How does metformin work to treat diabetes?",
54
+ "retrieved_context": "Metformin reduces hepatic glucose production, decreases intestinal glucose absorption, and improves insulin sensitivity by increasing peripheral glucose uptake.",
55
+ "pass_criteria": "Does the MODEL OUTPUT explain the mechanism of action accurately and completely?",
56
+ "rubric": "1. Incorrect mechanism\n2. Partially correct but missing key elements\n3. Mostly correct with minor omissions\n4. Fully correct and comprehensive"
57
+ },
58
+ {
59
+ "emoji": "πŸ“ˆ",
60
+ "model_output": "A bull market is characterized by rising stock prices over a sustained period.",
61
+ "user_input": "What is a bull market?",
62
+ "gold_answer": "A bull market is a financial market condition where prices are rising or expected to rise, typically defined by a 20% rise from recent lows.",
63
+ "pass_criteria": "Does the MODEL OUTPUT provide a complete and accurate definition?",
64
+ "rubric": "1. Incorrect or misleading\n2. Basic but incomplete\n3. Accurate but missing technical details\n4. Complete with technical specifics\n5. Comprehensive with market context"
65
+ },
66
+ {
67
+ "emoji": "πŸ«€",
68
+ "model_output": "Hypertension is diagnosed when blood pressure consistently exceeds 130/80 mmHg.",
69
+ "user_input": "What are the diagnostic criteria for hypertension?",
70
+ "retrieved_context": "Stage 1 hypertension: systolic 130-139 or diastolic 80-89 mmHg. Stage 2: systolic β‰₯140 or diastolic β‰₯90 mmHg.",
71
+ "pass_criteria": "Does the MODEL OUTPUT accurately reflect current diagnostic guidelines?",
72
+ "rubric": "1. Incorrect values\n2. Partially correct but imprecise\n3. Correct but missing staging\n4. Complete with staging information\n5. Comprehensive with risk factors"
73
+ },
74
+ {
75
+ "emoji": "πŸ’°",
76
+ "model_output": "ETFs are investment funds traded on stock exchanges, offering diversification and lower fees than mutual funds.",
77
+ "user_input": "What are ETFs and their advantages?",
78
+ "pass_criteria": "Does the MODEL OUTPUT explain both the concept and benefits accurately?",
79
+ "rubric": "1. Incorrect explanation\n2. Basic definition only\n3. Correct with some advantages\n4. Complete with multiple advantages"
80
+ },
81
+ {
82
+ "emoji": "πŸ₯",
83
+ "model_output": "MRSA is resistant to methicillin and most beta-lactam antibiotics.",
84
+ "user_input": "What is MRSA?",
85
+ "retrieved_context": "MRSA (Methicillin-resistant Staphylococcus aureus) is a bacteria resistant to many antibiotics. It can cause skin infections, pneumonia, and bloodstream infections.",
86
+ "pass_criteria": "Does the MODEL OUTPUT explain both resistance and clinical significance?",
87
+ "rubric": "1. Incorrect information\n2. Only mentions resistance\n3. Correct but incomplete clinical picture\n4. Complete with resistance and clinical aspects\n5. Comprehensive with treatment options"
88
+ },
89
+ {
90
+ "emoji": "πŸ“Š",
91
+ "model_output": "Diversification reduces risk by spreading investments across different asset classes, sectors, and geographical regions.",
92
+ "user_input": "What is diversification in investing?",
93
+ "gold_answer": "Diversification is a risk management strategy that mixes various investments within a portfolio to reduce exposure to any single asset or risk.",
94
+ "pass_criteria": "Does the MODEL OUTPUT explain both the concept and purpose of diversification?",
95
+ "rubric": "1. Incorrect concept\n2. Basic definition only\n3. Explains concept with limited context\n4. Complete with risk management aspects\n5. Comprehensive with practical examples"
96
+ },
97
+ {
98
+ "emoji": "🧬",
99
+ "model_output": "Type 2 diabetes involves insulin resistance and decreased insulin production.",
100
+ "user_input": "What causes Type 2 diabetes?",
101
+ "retrieved_context": "Type 2 diabetes develops when the body becomes resistant to insulin or the pancreas doesn't produce enough insulin. Risk factors include obesity, physical inactivity, and genetics.",
102
+ "pass_criteria": "Does the MODEL OUTPUT explain both pathophysiology and risk factors?",
103
+ "rubric": "1. Incorrect pathophysiology\n2. Basic mechanism only\n3. Correct mechanism with partial risk factors\n4. Complete with risk factors\n5. Comprehensive with prevention strategies"
104
+ },
105
+ {
106
+ "emoji": "πŸ’΅",
107
+ "model_output": "A mortgage amortization schedule shows monthly payments divided between principal and interest over the loan term.",
108
+ "user_input": "What is mortgage amortization?",
109
+ "pass_criteria": "Does the MODEL OUTPUT explain the concept and components clearly?",
110
+ "rubric": "1. Incorrect explanation\n2. Basic definition only\n3. Explains components without context\n4. Complete with payment breakdown\n5. Comprehensive with practical implications"
111
+ },
112
+ {
113
+ "emoji": "πŸ”¬",
114
+ "model_output": "Statins work by inhibiting HMG-CoA reductase, reducing cholesterol synthesis in the liver.",
115
+ "user_input": "How do statins lower cholesterol?",
116
+ "retrieved_context": "Statins block HMG-CoA reductase enzyme, reducing liver cholesterol production and increasing LDL receptor expression, leading to lower blood cholesterol.",
117
+ "pass_criteria": "Does the MODEL OUTPUT explain the mechanism accurately?",
118
+ "rubric": "1. Incorrect mechanism\n2. Partial mechanism only\n3. Correct mechanism without effects\n4. Complete with effects\n5. Comprehensive with clinical benefits"
119
+ },
120
+ {
121
+ "emoji": "πŸ“‰",
122
+ "model_output": "A bear market occurs when stock prices fall 20% or more from recent highs.",
123
+ "user_input": "What defines a bear market?",
124
+ "gold_answer": "A bear market is defined by a prolonged drop in investment prices, typically a 20% or more decline from recent highs, accompanied by widespread pessimism.",
125
+ "pass_criteria": "Does the MODEL OUTPUT provide technical criteria and market sentiment?",
126
+ "rubric": "1. Incorrect definition\n2. Technical criteria only\n3. Correct with partial context\n4. Complete with market sentiment\n5. Comprehensive with historical context"
127
  }
 
128
  ]
129
 
130
  HEADER = """