Spaces:

Meomap
/

Spend

Sleeping

App Files Files Community

Meomap commited on Jan 7

Commit

f57325a

verified ·

1 Parent(s): 19f103a

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -34

app.py CHANGED Viewed

@@ -1,35 +1,39 @@
 import re
 import gradio as gr
-# Define main and subcategories with keywords for fast matching
 CATEGORIES = {
     "Need": {
-        "Utilities": ["electricity", "water", "gas", "internet", "phone"],
-        "Housing": ["rent", "mortgage", "property tax", "maintenance"],
-        "Groceries": ["food", "beverages", "supermarket", "groceries"],
-        "Transportation": ["gasoline", "car payment", "public transit", "vehicle maintenance"],
-        "Education": ["tuition", "books", "school", "course"],
-        "Medical": ["health insurance", "doctor", "prescription", "medicine"],
-        "Insurance": ["home insurance", "auto insurance", "life insurance"],
-        "Childcare": ["daycare", "babysitter", "school fee"],
     },
     "Want": {
-        "Dining Out": ["restaurant", "cafe", "fast food", "delivery"],
-        "Entertainment": ["movie", "concert", "streaming", "game", "doll", "toy"],
-        "Travel": ["vacation", "flight", "hotel", "recreation"],
-        "Fitness": ["gym", "yoga", "sports", "fitness"],
-        "Shopping": ["clothing", "accessories", "gadget", "luxury"],
-        "Hobbies": ["art", "crafts", "sports gear", "book", "collectibles"],
-        "Personal Care": ["spa", "beauty", "haircut", "salon"],
     },
     "Saving/Investment": {
-        "Emergency Fund": ["emergency fund"],
-        "Retirement": ["retirement", "pension"],
-        "Investments": ["stocks", "bonds", "real estate", "crypto"],
-        "Debt Repayment": ["loan repayment", "credit card payment"],
-        "Education Fund": ["education fund"],
-        "Savings for Goals": ["down payment", "vacation savings", "wedding savings"],
-        "Health Savings": ["health savings account", "hsa", "fsa"],
     }
 }
@@ -37,38 +41,44 @@ CATEGORIES = {
 def normalize_vietnamese(text):
     return re.sub(r'[àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ]', '', text).replace("đ", "d")
-# Classify input into categories
-def classify_input(user_input):
     normalized_input = normalize_vietnamese(user_input.lower())
-    # Extract amount if mentioned
     amount_match = re.search(r"(\d+(\.\d{1,2})?)", normalized_input)
     amount = amount_match.group(0) if amount_match else "Unknown"
-    # Match subcategory and main category based on keywords
     for main_category, subcategories in CATEGORIES.items():
         for subcategory, keywords in subcategories.items():
             if any(keyword in normalized_input for keyword in keywords):
                 return {
                     "Main Category": main_category,
                     "Sub Category": subcategory,
-                    "Amount": amount
                 }
-    # Default response for unmatched cases
     return {
         "Main Category": "Uncategorized",
         "Sub Category": "Unknown",
-        "Amount": amount
     }
-# Define Gradio interface
 def process_user_input(user_input):
-    result = classify_input(user_input)
     return (
         f"Main Category: {result['Main Category']}\n"
         f"Sub Category: {result['Sub Category']}\n"
-        f"Amount: {result['Amount']}"
     )
 iface = gr.Interface(
@@ -76,7 +86,7 @@ iface = gr.Interface(
     inputs="text",
     outputs="text",
     title="Expenditure Classifier",
-    description="Classify expenditures into main and subcategories (Need, Want, Saving/Investment) instantly."
 )
 iface.launch()

 import re
+from transformers import pipeline
 import gradio as gr
+# Load a pre-trained multilingual NER model for entity recognition
+ner_model = pipeline("ner", model="dbmdz/bert-base-multilingual-cased", aggregation_strategy="simple")
+# Define categories and their associated keywords
 CATEGORIES = {
     "Need": {
+        "Utilities": ["dien", "nuoc", "gas", "internet", "dienthoai"],
+        "Housing": ["nha", "thue", "sua chua", "sua nha"],
+        "Groceries": ["thuc pham", "sieu thi", "rau cu", "do an"],
+        "Transportation": ["xang", "xe", "ve xe", "bao duong"],
+        "Education": ["hoc phi", "sach", "truong", "khoa hoc"],
+        "Medical": ["bao hiem", "bac si", "thuoc"],
+        "Insurance": ["bao hiem", "nha", "oto", "suc khoe"],
+        "Childcare": ["tre em", "truong mam non", "nguoi giup viec"],
     },
     "Want": {
+        "Dining Out": ["nha hang", "quan an", "cafe", "tra sua"],
+        "Entertainment": ["phim", "karaoke", "game", "nhac"],
+        "Travel": ["du lich", "ve may bay", "khach san"],
+        "Fitness": ["gym", "yoga", "the thao"],
+        "Shopping": ["quan ao", "phu kien", "dien thoai", "luxury"],
+        "Hobbies": ["so thich", "do choi", "my thuat"],
+        "Personal Care": ["spa", "toc", "lam dep", "my pham"],
     },
     "Saving/Investment": {
+        "Emergency Fund": ["quy du phong"],
+        "Retirement": ["nghi huu"],
+        "Investments": ["chung khoan", "bat dong san"],
+        "Debt Repayment": ["tra no"],
+        "Education Fund": ["quy hoc tap"],
+        "Savings for Goals": ["quy tiet kiem"],
+        "Health Savings": ["bao hiem y te"],
     }
 }
 def normalize_vietnamese(text):
     return re.sub(r'[àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ]', '', text).replace("đ", "d")
+# Extract entities and classify
+def classify_and_extract(user_input):
     normalized_input = normalize_vietnamese(user_input.lower())
+    # Extract amount using regex
     amount_match = re.search(r"(\d+(\.\d{1,2})?)", normalized_input)
     amount = amount_match.group(0) if amount_match else "Unknown"
+    # Run the NER model to detect entities
+    ner_results = ner_model(user_input)
+    # Match keywords for categories
     for main_category, subcategories in CATEGORIES.items():
         for subcategory, keywords in subcategories.items():
             if any(keyword in normalized_input for keyword in keywords):
                 return {
                     "Main Category": main_category,
                     "Sub Category": subcategory,
+                    "Amount": amount,
+                    "Entities": ner_results,
                 }
+    # Default response if no match
     return {
         "Main Category": "Uncategorized",
         "Sub Category": "Unknown",
+        "Amount": amount,
+        "Entities": ner_results,
     }
+# Gradio interface
 def process_user_input(user_input):
+    result = classify_and_extract(user_input)
     return (
         f"Main Category: {result['Main Category']}\n"
         f"Sub Category: {result['Sub Category']}\n"
+        f"Amount: {result['Amount']}\n"
+        f"Entities: {result['Entities']}"
     )
 iface = gr.Interface(
     inputs="text",
     outputs="text",
     title="Expenditure Classifier",
+    description="Classify expenditures into main and subcategories (Need, Want, Saving/Investment) and extract amounts."
 )
 iface.launch()