Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
import re
|
4 |
+
|
5 |
+
# Load a Hugging Face pipeline for zero-shot classification
|
6 |
+
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
|
7 |
+
|
8 |
+
# Define categories for classification
|
9 |
+
categories = ["Saving", "Need", "Want", "Investment"]
|
10 |
+
|
11 |
+
# Helper function to extract information from input
|
12 |
+
def extract_info(user_input):
|
13 |
+
# Remove Vietnamese accents (khong dau processing)
|
14 |
+
normalized_input = re.sub(r'[àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ]', '', user_input)
|
15 |
+
normalized_input = normalized_input.replace("đ", "d")
|
16 |
+
|
17 |
+
# Predefined rules for classification and information extraction
|
18 |
+
if "gui tiet kiem" in normalized_input:
|
19 |
+
return classify_saving(normalized_input)
|
20 |
+
else:
|
21 |
+
return classify_other(normalized_input)
|
22 |
+
|
23 |
+
# Function to classify saving-related expenditure
|
24 |
+
def classify_saving(input_text):
|
25 |
+
details = {}
|
26 |
+
# Extract potential fields from input text using regex
|
27 |
+
term_match = re.search(r'(\d+)\s*thang', input_text)
|
28 |
+
details['term'] = term_match.group(1) + " tháng" if term_match else None
|
29 |
+
|
30 |
+
# Ask for missing fields
|
31 |
+
if not details.get('term'):
|
32 |
+
return "Ban gui tiet kiem bao nhieu thang?"
|
33 |
+
|
34 |
+
return "Saving: {}".format(details)
|
35 |
+
|
36 |
+
# Function to classify other expenditures
|
37 |
+
def classify_other(input_text):
|
38 |
+
result = classifier(input_text, candidate_labels=categories)
|
39 |
+
classification = result['labels'][0] # Take the top classification
|
40 |
+
|
41 |
+
# Ask for amount and other details if not mentioned
|
42 |
+
amount_match = re.search(r'(\d+\.?\d*)', input_text)
|
43 |
+
if not amount_match:
|
44 |
+
return "Ban chi tieu nay het bao nhieu tien?"
|
45 |
+
|
46 |
+
# Extract sub-category
|
47 |
+
sub_category = input_text.lower()
|
48 |
+
return {
|
49 |
+
"classification": classification,
|
50 |
+
"amount": amount_match.group(0),
|
51 |
+
"sub_category": sub_category
|
52 |
+
}
|
53 |
+
|
54 |
+
# Define the Gradio interface
|
55 |
+
def process_user_input(user_input):
|
56 |
+
return extract_info(user_input)
|
57 |
+
|
58 |
+
iface = gr.Interface(
|
59 |
+
fn=process_user_input,
|
60 |
+
inputs="text",
|
61 |
+
outputs="text",
|
62 |
+
title="Expenditure Classification",
|
63 |
+
description="Classify expenditures into Need, Want, Saving, or Investment based on the 50-30-20 rule. Type in Vietnamese (không dấu)!"
|
64 |
+
)
|
65 |
+
|
66 |
+
iface.launch()
|