Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -206,7 +206,7 @@ markdown_text = """
|
|
| 206 |
markdown_text_file_upload = """
|
| 207 |
- Trained with ~1500 rows of data on bert-base-uncased, English.
|
| 208 |
- Upload CSV ONLY and name your column with budget line item as **text**.
|
| 209 |
-
-
|
| 210 |
- Accuracy of the model is ~88%.
|
| 211 |
"""
|
| 212 |
html_table = """
|
|
@@ -226,50 +226,20 @@ html_table = """
|
|
| 226 |
input for subsequent second-level classification. The project
|
| 227 |
is conducted with an exclusive focus on academic and research
|
| 228 |
objectives.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
Detailed metrics of the training process are as follows:
|
| 230 |
<code>TrainOutput(global_step=395, training_loss=1.1497593360611156,
|
| 231 |
metrics={'train_runtime': 650.0119, 'train_samples_per_second':
|
| 232 |
9.638, 'train_steps_per_second': 0.608, 'total_flos': 1648509163714560.0,
|
| 233 |
'train_loss': 1.1497593360611156, 'epoch': 5.0})</code>.
|
| 234 |
</p>
|
| 235 |
-
<table style="margin-left: auto; margin-right: auto;">
|
| 236 |
-
<tr>
|
| 237 |
-
<th>Epoch</th>
|
| 238 |
-
<th>Training Loss</th>
|
| 239 |
-
<th>Validation Loss</th>
|
| 240 |
-
<th>Accuracy</th>
|
| 241 |
-
</tr>
|
| 242 |
-
<tr>
|
| 243 |
-
<td>1</td>
|
| 244 |
-
<td>No log</td>
|
| 245 |
-
<td>2.095209</td>
|
| 246 |
-
<td>0.340764</td>
|
| 247 |
-
</tr>
|
| 248 |
-
<tr>
|
| 249 |
-
<td>2</td>
|
| 250 |
-
<td>No log</td>
|
| 251 |
-
<td>1.419945</td>
|
| 252 |
-
<td>0.662420</td>
|
| 253 |
-
</tr>
|
| 254 |
-
<tr>
|
| 255 |
-
<td>3</td>
|
| 256 |
-
<td>No log</td>
|
| 257 |
-
<td>0.683810</td>
|
| 258 |
-
<td>0.850318</td>
|
| 259 |
-
</tr>
|
| 260 |
-
<tr>
|
| 261 |
-
<td>4</td>
|
| 262 |
-
<td>No log</td>
|
| 263 |
-
<td>0.460408</td>
|
| 264 |
-
<td>0.872611</td>
|
| 265 |
-
</tr>
|
| 266 |
-
<tr>
|
| 267 |
-
<td>5</td>
|
| 268 |
-
<td>No log</td>
|
| 269 |
-
<td>0.422096</td>
|
| 270 |
-
<td>0.888535</td>
|
| 271 |
-
</tr>
|
| 272 |
-
</table>
|
| 273 |
</div>
|
| 274 |
"""
|
| 275 |
# First interface for single line input
|
|
@@ -277,7 +247,7 @@ iface1 = gr.Interface(
|
|
| 277 |
fn=predict,
|
| 278 |
inputs=gr.components.Textbox(lines=1, placeholder="Enter Budget line here...", label="Budget Input"),
|
| 279 |
outputs=gr.components.Label(label="Classification Output"),
|
| 280 |
-
title="COFOG AutoClassification",
|
| 281 |
description=markdown_text,
|
| 282 |
article=html_table,
|
| 283 |
allow_flagging="manual", # Enables flagging
|
|
@@ -294,7 +264,7 @@ iface2 = gr.Interface(
|
|
| 294 |
outputs=gr.components.DataFrame(label="Classification Results"),
|
| 295 |
description=markdown_text_file_upload,
|
| 296 |
article=html_table,
|
| 297 |
-
title="Batch Classification"
|
| 298 |
)
|
| 299 |
|
| 300 |
# Combine the interfaces in a tabbed interface
|
|
|
|
| 206 |
markdown_text_file_upload = """
|
| 207 |
- Trained with ~1500 rows of data on bert-base-uncased, English.
|
| 208 |
- Upload CSV ONLY and name your column with budget line item as **text**.
|
| 209 |
+
- Using RAG (Retrieval-augmented generation) aproach to feed context into classifier using preceding lines of budget.
|
| 210 |
- Accuracy of the model is ~88%.
|
| 211 |
"""
|
| 212 |
html_table = """
|
|
|
|
| 226 |
input for subsequent second-level classification. The project
|
| 227 |
is conducted with an exclusive focus on academic and research
|
| 228 |
objectives.
|
| 229 |
+
|
| 230 |
+
For batch prediction we integrated Retriever-Augmented Generator (RAG)
|
| 231 |
+
approach. This approach enriches the prediction process
|
| 232 |
+
by incorporating contextual information from up to 5 preceding
|
| 233 |
+
lines in the dataset, significantly enhancing the model's
|
| 234 |
+
ability to understand and classify each entry in the context
|
| 235 |
+
of related data.
|
| 236 |
+
|
| 237 |
Detailed metrics of the training process are as follows:
|
| 238 |
<code>TrainOutput(global_step=395, training_loss=1.1497593360611156,
|
| 239 |
metrics={'train_runtime': 650.0119, 'train_samples_per_second':
|
| 240 |
9.638, 'train_steps_per_second': 0.608, 'total_flos': 1648509163714560.0,
|
| 241 |
'train_loss': 1.1497593360611156, 'epoch': 5.0})</code>.
|
| 242 |
</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
</div>
|
| 244 |
"""
|
| 245 |
# First interface for single line input
|
|
|
|
| 247 |
fn=predict,
|
| 248 |
inputs=gr.components.Textbox(lines=1, placeholder="Enter Budget line here...", label="Budget Input"),
|
| 249 |
outputs=gr.components.Label(label="Classification Output"),
|
| 250 |
+
title="COFOG AutoClassification - Single Line",
|
| 251 |
description=markdown_text,
|
| 252 |
article=html_table,
|
| 253 |
allow_flagging="manual", # Enables flagging
|
|
|
|
| 264 |
outputs=gr.components.DataFrame(label="Classification Results"),
|
| 265 |
description=markdown_text_file_upload,
|
| 266 |
article=html_table,
|
| 267 |
+
title="COFOG AutoClassification - Batch Classification"
|
| 268 |
)
|
| 269 |
|
| 270 |
# Combine the interfaces in a tabbed interface
|