Meomap commited on
Commit
f57325a
·
verified ·
1 Parent(s): 19f103a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -34
app.py CHANGED
@@ -1,35 +1,39 @@
1
  import re
 
2
  import gradio as gr
3
 
4
- # Define main and subcategories with keywords for fast matching
 
 
 
5
  CATEGORIES = {
6
  "Need": {
7
- "Utilities": ["electricity", "water", "gas", "internet", "phone"],
8
- "Housing": ["rent", "mortgage", "property tax", "maintenance"],
9
- "Groceries": ["food", "beverages", "supermarket", "groceries"],
10
- "Transportation": ["gasoline", "car payment", "public transit", "vehicle maintenance"],
11
- "Education": ["tuition", "books", "school", "course"],
12
- "Medical": ["health insurance", "doctor", "prescription", "medicine"],
13
- "Insurance": ["home insurance", "auto insurance", "life insurance"],
14
- "Childcare": ["daycare", "babysitter", "school fee"],
15
  },
16
  "Want": {
17
- "Dining Out": ["restaurant", "cafe", "fast food", "delivery"],
18
- "Entertainment": ["movie", "concert", "streaming", "game", "doll", "toy"],
19
- "Travel": ["vacation", "flight", "hotel", "recreation"],
20
- "Fitness": ["gym", "yoga", "sports", "fitness"],
21
- "Shopping": ["clothing", "accessories", "gadget", "luxury"],
22
- "Hobbies": ["art", "crafts", "sports gear", "book", "collectibles"],
23
- "Personal Care": ["spa", "beauty", "haircut", "salon"],
24
  },
25
  "Saving/Investment": {
26
- "Emergency Fund": ["emergency fund"],
27
- "Retirement": ["retirement", "pension"],
28
- "Investments": ["stocks", "bonds", "real estate", "crypto"],
29
- "Debt Repayment": ["loan repayment", "credit card payment"],
30
- "Education Fund": ["education fund"],
31
- "Savings for Goals": ["down payment", "vacation savings", "wedding savings"],
32
- "Health Savings": ["health savings account", "hsa", "fsa"],
33
  }
34
  }
35
 
@@ -37,38 +41,44 @@ CATEGORIES = {
37
  def normalize_vietnamese(text):
38
  return re.sub(r'[àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ]', '', text).replace("đ", "d")
39
 
40
- # Classify input into categories
41
- def classify_input(user_input):
42
  normalized_input = normalize_vietnamese(user_input.lower())
43
 
44
- # Extract amount if mentioned
45
  amount_match = re.search(r"(\d+(\.\d{1,2})?)", normalized_input)
46
  amount = amount_match.group(0) if amount_match else "Unknown"
47
 
48
- # Match subcategory and main category based on keywords
 
 
 
49
  for main_category, subcategories in CATEGORIES.items():
50
  for subcategory, keywords in subcategories.items():
51
  if any(keyword in normalized_input for keyword in keywords):
52
  return {
53
  "Main Category": main_category,
54
  "Sub Category": subcategory,
55
- "Amount": amount
 
56
  }
57
 
58
- # Default response for unmatched cases
59
  return {
60
  "Main Category": "Uncategorized",
61
  "Sub Category": "Unknown",
62
- "Amount": amount
 
63
  }
64
 
65
- # Define Gradio interface
66
  def process_user_input(user_input):
67
- result = classify_input(user_input)
68
  return (
69
  f"Main Category: {result['Main Category']}\n"
70
  f"Sub Category: {result['Sub Category']}\n"
71
- f"Amount: {result['Amount']}"
 
72
  )
73
 
74
  iface = gr.Interface(
@@ -76,7 +86,7 @@ iface = gr.Interface(
76
  inputs="text",
77
  outputs="text",
78
  title="Expenditure Classifier",
79
- description="Classify expenditures into main and subcategories (Need, Want, Saving/Investment) instantly."
80
  )
81
 
82
  iface.launch()
 
1
  import re
2
+ from transformers import pipeline
3
  import gradio as gr
4
 
5
+ # Load a pre-trained multilingual NER model for entity recognition
6
+ ner_model = pipeline("ner", model="dbmdz/bert-base-multilingual-cased", aggregation_strategy="simple")
7
+
8
+ # Define categories and their associated keywords
9
  CATEGORIES = {
10
  "Need": {
11
+ "Utilities": ["dien", "nuoc", "gas", "internet", "dienthoai"],
12
+ "Housing": ["nha", "thue", "sua chua", "sua nha"],
13
+ "Groceries": ["thuc pham", "sieu thi", "rau cu", "do an"],
14
+ "Transportation": ["xang", "xe", "ve xe", "bao duong"],
15
+ "Education": ["hoc phi", "sach", "truong", "khoa hoc"],
16
+ "Medical": ["bao hiem", "bac si", "thuoc"],
17
+ "Insurance": ["bao hiem", "nha", "oto", "suc khoe"],
18
+ "Childcare": ["tre em", "truong mam non", "nguoi giup viec"],
19
  },
20
  "Want": {
21
+ "Dining Out": ["nha hang", "quan an", "cafe", "tra sua"],
22
+ "Entertainment": ["phim", "karaoke", "game", "nhac"],
23
+ "Travel": ["du lich", "ve may bay", "khach san"],
24
+ "Fitness": ["gym", "yoga", "the thao"],
25
+ "Shopping": ["quan ao", "phu kien", "dien thoai", "luxury"],
26
+ "Hobbies": ["so thich", "do choi", "my thuat"],
27
+ "Personal Care": ["spa", "toc", "lam dep", "my pham"],
28
  },
29
  "Saving/Investment": {
30
+ "Emergency Fund": ["quy du phong"],
31
+ "Retirement": ["nghi huu"],
32
+ "Investments": ["chung khoan", "bat dong san"],
33
+ "Debt Repayment": ["tra no"],
34
+ "Education Fund": ["quy hoc tap"],
35
+ "Savings for Goals": ["quy tiet kiem"],
36
+ "Health Savings": ["bao hiem y te"],
37
  }
38
  }
39
 
 
41
  def normalize_vietnamese(text):
42
  return re.sub(r'[àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ]', '', text).replace("đ", "d")
43
 
44
+ # Extract entities and classify
45
+ def classify_and_extract(user_input):
46
  normalized_input = normalize_vietnamese(user_input.lower())
47
 
48
+ # Extract amount using regex
49
  amount_match = re.search(r"(\d+(\.\d{1,2})?)", normalized_input)
50
  amount = amount_match.group(0) if amount_match else "Unknown"
51
 
52
+ # Run the NER model to detect entities
53
+ ner_results = ner_model(user_input)
54
+
55
+ # Match keywords for categories
56
  for main_category, subcategories in CATEGORIES.items():
57
  for subcategory, keywords in subcategories.items():
58
  if any(keyword in normalized_input for keyword in keywords):
59
  return {
60
  "Main Category": main_category,
61
  "Sub Category": subcategory,
62
+ "Amount": amount,
63
+ "Entities": ner_results,
64
  }
65
 
66
+ # Default response if no match
67
  return {
68
  "Main Category": "Uncategorized",
69
  "Sub Category": "Unknown",
70
+ "Amount": amount,
71
+ "Entities": ner_results,
72
  }
73
 
74
+ # Gradio interface
75
  def process_user_input(user_input):
76
+ result = classify_and_extract(user_input)
77
  return (
78
  f"Main Category: {result['Main Category']}\n"
79
  f"Sub Category: {result['Sub Category']}\n"
80
+ f"Amount: {result['Amount']}\n"
81
+ f"Entities: {result['Entities']}"
82
  )
83
 
84
  iface = gr.Interface(
 
86
  inputs="text",
87
  outputs="text",
88
  title="Expenditure Classifier",
89
+ description="Classify expenditures into main and subcategories (Need, Want, Saving/Investment) and extract amounts."
90
  )
91
 
92
  iface.launch()