Meomap commited on
Commit
19f103a
·
verified ·
1 Parent(s): 0f90513

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -41
app.py CHANGED
@@ -1,67 +1,82 @@
1
  import re
2
- from transformers import pipeline
3
  import gradio as gr
4
 
5
- # Load a lightweight model for classification
6
- classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english", return_all_scores=True)
7
-
8
- # Define main and subcategories
9
  CATEGORIES = {
10
- "Need": [
11
- "utilities", "housing", "groceries", "transportation", "education", "medical", "insurance", "childcare"
12
- ],
13
- "Want": [
14
- "dining out", "entertainment", "travel", "fitness", "shopping", "hobbies", "personal care"
15
- ],
16
- "Saving/Investment": [
17
- "emergency fund", "retirement", "investments", "debt repayment", "education fund", "savings for goals", "health savings"
18
- ]
19
- }
20
-
21
- # Predefined keywords for fast classification
22
- KEYWORDS = {
23
- "saving": ["gui tiet kiem", "tiet kiem", "lai suat", "savings", "interest"],
24
- "utilities": ["electricity", "water", "gas", "internet", "phone"],
25
- "housing": ["rent", "mortgage", "property tax", "maintenance"],
26
- "groceries": ["food", "beverages", "supermarket"],
27
- "transportation": ["gas", "car", "vehicle", "public transit"],
28
- "education": ["tuition", "books", "school", "course"],
29
- "medical": ["insurance", "doctor", "prescriptions", "medicine"],
30
- "dining out": ["restaurant", "cafe", "fast food", "delivery"],
31
- # Add more keywords for all subcategories...
 
 
 
 
 
 
32
  }
33
 
34
  # Normalize Vietnamese input (remove accents)
35
  def normalize_vietnamese(text):
36
  return re.sub(r'[àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ]', '', text).replace("đ", "d")
37
 
38
- # Classify input into main and subcategories
39
  def classify_input(user_input):
40
- # Normalize input
41
  normalized_input = normalize_vietnamese(user_input.lower())
42
 
43
- # Match keywords for faster classification
44
- for main_cat, subcats in CATEGORIES.items():
45
- for subcat in subcats:
46
- if any(keyword in normalized_input for keyword in KEYWORDS.get(subcat, [])):
47
- return {"Main Category": main_cat, "Sub Category": subcat.capitalize()}
 
 
 
 
 
 
 
 
48
 
49
- # Fallback to model classification
50
- result = classifier(normalized_input)
51
- category = max(result, key=lambda x: x["score"])["label"]
52
- return {"Main Category": category, "Sub Category": "Unknown"}
 
 
53
 
54
  # Define Gradio interface
55
  def process_user_input(user_input):
56
- classification = classify_input(user_input)
57
- return f"Main Category: {classification['Main Category']}\nSub Category: {classification['Sub Category']}"
 
 
 
 
58
 
59
  iface = gr.Interface(
60
  fn=process_user_input,
61
  inputs="text",
62
  outputs="text",
63
  title="Expenditure Classifier",
64
- description="Classify expenditures into main and subcategories (Need, Want, Saving/Investment)."
65
  )
66
 
67
  iface.launch()
 
1
  import re
 
2
  import gradio as gr
3
 
4
+ # Define main and subcategories with keywords for fast matching
 
 
 
5
  CATEGORIES = {
6
+ "Need": {
7
+ "Utilities": ["electricity", "water", "gas", "internet", "phone"],
8
+ "Housing": ["rent", "mortgage", "property tax", "maintenance"],
9
+ "Groceries": ["food", "beverages", "supermarket", "groceries"],
10
+ "Transportation": ["gasoline", "car payment", "public transit", "vehicle maintenance"],
11
+ "Education": ["tuition", "books", "school", "course"],
12
+ "Medical": ["health insurance", "doctor", "prescription", "medicine"],
13
+ "Insurance": ["home insurance", "auto insurance", "life insurance"],
14
+ "Childcare": ["daycare", "babysitter", "school fee"],
15
+ },
16
+ "Want": {
17
+ "Dining Out": ["restaurant", "cafe", "fast food", "delivery"],
18
+ "Entertainment": ["movie", "concert", "streaming", "game", "doll", "toy"],
19
+ "Travel": ["vacation", "flight", "hotel", "recreation"],
20
+ "Fitness": ["gym", "yoga", "sports", "fitness"],
21
+ "Shopping": ["clothing", "accessories", "gadget", "luxury"],
22
+ "Hobbies": ["art", "crafts", "sports gear", "book", "collectibles"],
23
+ "Personal Care": ["spa", "beauty", "haircut", "salon"],
24
+ },
25
+ "Saving/Investment": {
26
+ "Emergency Fund": ["emergency fund"],
27
+ "Retirement": ["retirement", "pension"],
28
+ "Investments": ["stocks", "bonds", "real estate", "crypto"],
29
+ "Debt Repayment": ["loan repayment", "credit card payment"],
30
+ "Education Fund": ["education fund"],
31
+ "Savings for Goals": ["down payment", "vacation savings", "wedding savings"],
32
+ "Health Savings": ["health savings account", "hsa", "fsa"],
33
+ }
34
  }
35
 
36
  # Normalize Vietnamese input (remove accents)
37
  def normalize_vietnamese(text):
38
  return re.sub(r'[àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ]', '', text).replace("đ", "d")
39
 
40
+ # Classify input into categories
41
  def classify_input(user_input):
 
42
  normalized_input = normalize_vietnamese(user_input.lower())
43
 
44
+ # Extract amount if mentioned
45
+ amount_match = re.search(r"(\d+(\.\d{1,2})?)", normalized_input)
46
+ amount = amount_match.group(0) if amount_match else "Unknown"
47
+
48
+ # Match subcategory and main category based on keywords
49
+ for main_category, subcategories in CATEGORIES.items():
50
+ for subcategory, keywords in subcategories.items():
51
+ if any(keyword in normalized_input for keyword in keywords):
52
+ return {
53
+ "Main Category": main_category,
54
+ "Sub Category": subcategory,
55
+ "Amount": amount
56
+ }
57
 
58
+ # Default response for unmatched cases
59
+ return {
60
+ "Main Category": "Uncategorized",
61
+ "Sub Category": "Unknown",
62
+ "Amount": amount
63
+ }
64
 
65
  # Define Gradio interface
66
  def process_user_input(user_input):
67
+ result = classify_input(user_input)
68
+ return (
69
+ f"Main Category: {result['Main Category']}\n"
70
+ f"Sub Category: {result['Sub Category']}\n"
71
+ f"Amount: {result['Amount']}"
72
+ )
73
 
74
  iface = gr.Interface(
75
  fn=process_user_input,
76
  inputs="text",
77
  outputs="text",
78
  title="Expenditure Classifier",
79
+ description="Classify expenditures into main and subcategories (Need, Want, Saving/Investment) instantly."
80
  )
81
 
82
  iface.launch()