Meomap commited on
Commit
cb46b54
·
verified ·
1 Parent(s): 8673938

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import re
4
+
5
+ # Load a Hugging Face pipeline for zero-shot classification
6
+ classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
7
+
8
+ # Define categories for classification
9
+ categories = ["Saving", "Need", "Want", "Investment"]
10
+
11
+ # Helper function to extract information from input
12
+ def extract_info(user_input):
13
+ # Remove Vietnamese accents (khong dau processing)
14
+ normalized_input = re.sub(r'[àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ]', '', user_input)
15
+ normalized_input = normalized_input.replace("đ", "d")
16
+
17
+ # Predefined rules for classification and information extraction
18
+ if "gui tiet kiem" in normalized_input:
19
+ return classify_saving(normalized_input)
20
+ else:
21
+ return classify_other(normalized_input)
22
+
23
+ # Function to classify saving-related expenditure
24
+ def classify_saving(input_text):
25
+ details = {}
26
+ # Extract potential fields from input text using regex
27
+ term_match = re.search(r'(\d+)\s*thang', input_text)
28
+ details['term'] = term_match.group(1) + " tháng" if term_match else None
29
+
30
+ # Ask for missing fields
31
+ if not details.get('term'):
32
+ return "Ban gui tiet kiem bao nhieu thang?"
33
+
34
+ return "Saving: {}".format(details)
35
+
36
+ # Function to classify other expenditures
37
+ def classify_other(input_text):
38
+ result = classifier(input_text, candidate_labels=categories)
39
+ classification = result['labels'][0] # Take the top classification
40
+
41
+ # Ask for amount and other details if not mentioned
42
+ amount_match = re.search(r'(\d+\.?\d*)', input_text)
43
+ if not amount_match:
44
+ return "Ban chi tieu nay het bao nhieu tien?"
45
+
46
+ # Extract sub-category
47
+ sub_category = input_text.lower()
48
+ return {
49
+ "classification": classification,
50
+ "amount": amount_match.group(0),
51
+ "sub_category": sub_category
52
+ }
53
+
54
+ # Define the Gradio interface
55
+ def process_user_input(user_input):
56
+ return extract_info(user_input)
57
+
58
+ iface = gr.Interface(
59
+ fn=process_user_input,
60
+ inputs="text",
61
+ outputs="text",
62
+ title="Expenditure Classification",
63
+ description="Classify expenditures into Need, Want, Saving, or Investment based on the 50-30-20 rule. Type in Vietnamese (không dấu)!"
64
+ )
65
+
66
+ iface.launch()