Spaces:
Sleeping
Sleeping
feat: add new examples
Browse files
app.py
CHANGED
@@ -48,15 +48,83 @@ Your output must in the following format:
|
|
48 |
|
49 |
EXAMPLES = [
|
50 |
{
|
51 |
-
"emoji": "
|
52 |
-
"model_output": "
|
53 |
-
"user_input": "
|
54 |
-
"
|
55 |
-
"
|
56 |
-
"
|
57 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
}
|
59 |
-
|
60 |
]
|
61 |
|
62 |
HEADER = """
|
|
|
48 |
|
49 |
EXAMPLES = [
|
50 |
{
|
51 |
+
"emoji": "π",
|
52 |
+
"model_output": "Metformin works by reducing glucose production in the liver and improving insulin sensitivity.",
|
53 |
+
"user_input": "How does metformin work to treat diabetes?",
|
54 |
+
"retrieved_context": "Metformin reduces hepatic glucose production, decreases intestinal glucose absorption, and improves insulin sensitivity by increasing peripheral glucose uptake.",
|
55 |
+
"pass_criteria": "Does the MODEL OUTPUT explain the mechanism of action accurately and completely?",
|
56 |
+
"rubric": "1. Incorrect mechanism\n2. Partially correct but missing key elements\n3. Mostly correct with minor omissions\n4. Fully correct and comprehensive"
|
57 |
+
},
|
58 |
+
{
|
59 |
+
"emoji": "π",
|
60 |
+
"model_output": "A bull market is characterized by rising stock prices over a sustained period.",
|
61 |
+
"user_input": "What is a bull market?",
|
62 |
+
"gold_answer": "A bull market is a financial market condition where prices are rising or expected to rise, typically defined by a 20% rise from recent lows.",
|
63 |
+
"pass_criteria": "Does the MODEL OUTPUT provide a complete and accurate definition?",
|
64 |
+
"rubric": "1. Incorrect or misleading\n2. Basic but incomplete\n3. Accurate but missing technical details\n4. Complete with technical specifics\n5. Comprehensive with market context"
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"emoji": "π«",
|
68 |
+
"model_output": "Hypertension is diagnosed when blood pressure consistently exceeds 130/80 mmHg.",
|
69 |
+
"user_input": "What are the diagnostic criteria for hypertension?",
|
70 |
+
"retrieved_context": "Stage 1 hypertension: systolic 130-139 or diastolic 80-89 mmHg. Stage 2: systolic β₯140 or diastolic β₯90 mmHg.",
|
71 |
+
"pass_criteria": "Does the MODEL OUTPUT accurately reflect current diagnostic guidelines?",
|
72 |
+
"rubric": "1. Incorrect values\n2. Partially correct but imprecise\n3. Correct but missing staging\n4. Complete with staging information\n5. Comprehensive with risk factors"
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"emoji": "π°",
|
76 |
+
"model_output": "ETFs are investment funds traded on stock exchanges, offering diversification and lower fees than mutual funds.",
|
77 |
+
"user_input": "What are ETFs and their advantages?",
|
78 |
+
"pass_criteria": "Does the MODEL OUTPUT explain both the concept and benefits accurately?",
|
79 |
+
"rubric": "1. Incorrect explanation\n2. Basic definition only\n3. Correct with some advantages\n4. Complete with multiple advantages"
|
80 |
+
},
|
81 |
+
{
|
82 |
+
"emoji": "π₯",
|
83 |
+
"model_output": "MRSA is resistant to methicillin and most beta-lactam antibiotics.",
|
84 |
+
"user_input": "What is MRSA?",
|
85 |
+
"retrieved_context": "MRSA (Methicillin-resistant Staphylococcus aureus) is a bacteria resistant to many antibiotics. It can cause skin infections, pneumonia, and bloodstream infections.",
|
86 |
+
"pass_criteria": "Does the MODEL OUTPUT explain both resistance and clinical significance?",
|
87 |
+
"rubric": "1. Incorrect information\n2. Only mentions resistance\n3. Correct but incomplete clinical picture\n4. Complete with resistance and clinical aspects\n5. Comprehensive with treatment options"
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"emoji": "π",
|
91 |
+
"model_output": "Diversification reduces risk by spreading investments across different asset classes, sectors, and geographical regions.",
|
92 |
+
"user_input": "What is diversification in investing?",
|
93 |
+
"gold_answer": "Diversification is a risk management strategy that mixes various investments within a portfolio to reduce exposure to any single asset or risk.",
|
94 |
+
"pass_criteria": "Does the MODEL OUTPUT explain both the concept and purpose of diversification?",
|
95 |
+
"rubric": "1. Incorrect concept\n2. Basic definition only\n3. Explains concept with limited context\n4. Complete with risk management aspects\n5. Comprehensive with practical examples"
|
96 |
+
},
|
97 |
+
{
|
98 |
+
"emoji": "π§¬",
|
99 |
+
"model_output": "Type 2 diabetes involves insulin resistance and decreased insulin production.",
|
100 |
+
"user_input": "What causes Type 2 diabetes?",
|
101 |
+
"retrieved_context": "Type 2 diabetes develops when the body becomes resistant to insulin or the pancreas doesn't produce enough insulin. Risk factors include obesity, physical inactivity, and genetics.",
|
102 |
+
"pass_criteria": "Does the MODEL OUTPUT explain both pathophysiology and risk factors?",
|
103 |
+
"rubric": "1. Incorrect pathophysiology\n2. Basic mechanism only\n3. Correct mechanism with partial risk factors\n4. Complete with risk factors\n5. Comprehensive with prevention strategies"
|
104 |
+
},
|
105 |
+
{
|
106 |
+
"emoji": "π΅",
|
107 |
+
"model_output": "A mortgage amortization schedule shows monthly payments divided between principal and interest over the loan term.",
|
108 |
+
"user_input": "What is mortgage amortization?",
|
109 |
+
"pass_criteria": "Does the MODEL OUTPUT explain the concept and components clearly?",
|
110 |
+
"rubric": "1. Incorrect explanation\n2. Basic definition only\n3. Explains components without context\n4. Complete with payment breakdown\n5. Comprehensive with practical implications"
|
111 |
+
},
|
112 |
+
{
|
113 |
+
"emoji": "π¬",
|
114 |
+
"model_output": "Statins work by inhibiting HMG-CoA reductase, reducing cholesterol synthesis in the liver.",
|
115 |
+
"user_input": "How do statins lower cholesterol?",
|
116 |
+
"retrieved_context": "Statins block HMG-CoA reductase enzyme, reducing liver cholesterol production and increasing LDL receptor expression, leading to lower blood cholesterol.",
|
117 |
+
"pass_criteria": "Does the MODEL OUTPUT explain the mechanism accurately?",
|
118 |
+
"rubric": "1. Incorrect mechanism\n2. Partial mechanism only\n3. Correct mechanism without effects\n4. Complete with effects\n5. Comprehensive with clinical benefits"
|
119 |
+
},
|
120 |
+
{
|
121 |
+
"emoji": "π",
|
122 |
+
"model_output": "A bear market occurs when stock prices fall 20% or more from recent highs.",
|
123 |
+
"user_input": "What defines a bear market?",
|
124 |
+
"gold_answer": "A bear market is defined by a prolonged drop in investment prices, typically a 20% or more decline from recent highs, accompanied by widespread pessimism.",
|
125 |
+
"pass_criteria": "Does the MODEL OUTPUT provide technical criteria and market sentiment?",
|
126 |
+
"rubric": "1. Incorrect definition\n2. Technical criteria only\n3. Correct with partial context\n4. Complete with market sentiment\n5. Comprehensive with historical context"
|
127 |
}
|
|
|
128 |
]
|
129 |
|
130 |
HEADER = """
|