Spaces:

Meomap
/

Spend

Sleeping

App Files Files Community

Meomap commited on Jan 7

Commit

0f90513

verified ·

1 Parent(s): 26cf865

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -48

app.py CHANGED Viewed

@@ -1,66 +1,67 @@
-import gradio as gr
-from transformers import pipeline
 import re
-# Load a Hugging Face pipeline for zero-shot classification
-classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-# Define categories for classification
-categories = ["Saving", "Need", "Want", "Investment"]
-# Helper function to extract information from input
-def extract_info(user_input):
-    # Remove Vietnamese accents (khong dau processing)
-    normalized_input = re.sub(r'[àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ]', '', user_input)
-    normalized_input = normalized_input.replace("đ", "d")
-    # Predefined rules for classification and information extraction
-    if "gui tiet kiem" in normalized_input:
-        return classify_saving(normalized_input)
-    else:
-        return classify_other(normalized_input)
-# Function to classify saving-related expenditure
-def classify_saving(input_text):
-    details = {}
-    # Extract potential fields from input text using regex
-    term_match = re.search(r'(\d+)\s*thang', input_text)
-    details['term'] = term_match.group(1) + " tháng" if term_match else None
-    # Ask for missing fields
-    if not details.get('term'):
-        return "Ban gui tiet kiem bao nhieu thang?"
-    return "Saving: {}".format(details)
-# Function to classify other expenditures
-def classify_other(input_text):
-    result = classifier(input_text, candidate_labels=categories)
-    classification = result['labels'][0]  # Take the top classification
-    # Ask for amount and other details if not mentioned
-    amount_match = re.search(r'(\d+\.?\d*)', input_text)
-    if not amount_match:
-        return "Ban chi tieu nay het bao nhieu tien?"
-    # Extract sub-category
-    sub_category = input_text.lower()
-    return {
-        "classification": classification,
-        "amount": amount_match.group(0),
-        "sub_category": sub_category
-    }
-# Define the Gradio interface
 def process_user_input(user_input):
-    return extract_info(user_input)
 iface = gr.Interface(
     fn=process_user_input,
     inputs="text",
     outputs="text",
-    title="Expenditure Classification",
-    description="Classify expenditures into Need, Want, Saving, or Investment based on the 50-30-20 rule. Type in Vietnamese (không dấu)!"
 )
 iface.launch()

 import re
+from transformers import pipeline
+import gradio as gr
+# Load a lightweight model for classification
+classifier = pipeline("text-classification", model="distilbert-base-uncased-finetuned-sst-2-english", return_all_scores=True)
+# Define main and subcategories
+CATEGORIES = {
+    "Need": [
+        "utilities", "housing", "groceries", "transportation", "education", "medical", "insurance", "childcare"
+    ],
+    "Want": [
+        "dining out", "entertainment", "travel", "fitness", "shopping", "hobbies", "personal care"
+    ],
+    "Saving/Investment": [
+        "emergency fund", "retirement", "investments", "debt repayment", "education fund", "savings for goals", "health savings"
+    ]
+}
+# Predefined keywords for fast classification
+KEYWORDS = {
+    "saving": ["gui tiet kiem", "tiet kiem", "lai suat", "savings", "interest"],
+    "utilities": ["electricity", "water", "gas", "internet", "phone"],
+    "housing": ["rent", "mortgage", "property tax", "maintenance"],
+    "groceries": ["food", "beverages", "supermarket"],
+    "transportation": ["gas", "car", "vehicle", "public transit"],
+    "education": ["tuition", "books", "school", "course"],
+    "medical": ["insurance", "doctor", "prescriptions", "medicine"],
+    "dining out": ["restaurant", "cafe", "fast food", "delivery"],
+    # Add more keywords for all subcategories...
+}
+# Normalize Vietnamese input (remove accents)
+def normalize_vietnamese(text):
+    return re.sub(r'[àáạảãâầấậẩẫăằắặẳẵèéẹẻẽêềếệểễìíịỉĩòóọỏõôồốộổỗơờớợởỡùúụủũưừứựửữỳýỵỷỹđ]', '', text).replace("đ", "d")
+# Classify input into main and subcategories
+def classify_input(user_input):
+    # Normalize input
+    normalized_input = normalize_vietnamese(user_input.lower())
+    # Match keywords for faster classification
+    for main_cat, subcats in CATEGORIES.items():
+        for subcat in subcats:
+            if any(keyword in normalized_input for keyword in KEYWORDS.get(subcat, [])):
+                return {"Main Category": main_cat, "Sub Category": subcat.capitalize()}
+    # Fallback to model classification
+    result = classifier(normalized_input)
+    category = max(result, key=lambda x: x["score"])["label"]
+    return {"Main Category": category, "Sub Category": "Unknown"}
+# Define Gradio interface
 def process_user_input(user_input):
+    classification = classify_input(user_input)
+    return f"Main Category: {classification['Main Category']}\nSub Category: {classification['Sub Category']}"
 iface = gr.Interface(
     fn=process_user_input,
     inputs="text",
     outputs="text",
+    title="Expenditure Classifier",
+    description="Classify expenditures into main and subcategories (Need, Want, Saving/Investment)."
 )
 iface.launch()