Spaces:

shubh7
/

RetailGenie

Sleeping

App Files Files Community

shubh7 commited on May 21

Commit

5f946b0

1 Parent(s): f3bfea5

Adding application file

Browse files

Files changed (25) hide show

app.py +165 -0
code/__init__.py +1 -0
code/__pycache__/__init__.cpython-310.pyc +0 -0
code/__pycache__/train_intent_classifier_local.cpython-310.pyc +0 -0
code/__pycache__/train_sqlgen_t5_local.cpython-310.pyc +0 -0
code/cloud_train_intent_classifier_script.py +77 -0
code/cloud_train_sqlgen_t5_script.py +72 -0
code/train_intent_classifier_local.py +84 -0
code/train_sqlgen_t5_local.py +76 -0
data/retail_dataset.csv +11 -0
data/retail_schema.sql +30 -0
data/testing_sql_data.csv +21 -0
model_intent_classifier/config.json +38 -0
model_intent_classifier/label_mapping.json +1 -0
model_intent_classifier/pytorch_model.bin +3 -0
model_intent_classifier/special_tokens_map.json +7 -0
model_intent_classifier/tokenizer.json +0 -0
model_intent_classifier/tokenizer_config.json +14 -0
model_intent_classifier/vocab.txt +0 -0
model_sqlgen_t5/config.json +61 -0
model_sqlgen_t5/pytorch_model.bin +3 -0
model_sqlgen_t5/special_tokens_map.json +107 -0
model_sqlgen_t5/spiece.model +3 -0
model_sqlgen_t5/tokenizer_config.json +114 -0
requirements.txt +15 -0

app.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import gradio as gr
+import os
+import sys
+import pandas as pd
+import sqlite3
+from pathlib import Path
+import matplotlib.pyplot as plt
+import re
+# For Hugging Face Spaces, set project root to current directory
+PROJECT_ROOT = Path(__file__).parent.resolve()
+sys.path.append(str(PROJECT_ROOT))
+# Import model loading and utility functions
+from code.train_sqlgen_t5_local import load_model as load_sql_model, generate_sql, get_schema_from_csv
+from code.train_intent_classifier_local import load_model as load_intent_model, classify_intent
+# Load models
+sql_model, sql_tokenizer, device = load_sql_model()
+intent_model, intent_tokenizer, device, label_mapping = load_intent_model()
+# Path to the built-in data file in the data folder
+DATA_FILE = str(PROJECT_ROOT / "data" / "testing_sql_data.csv")
+# Verify data file exists
+if not os.path.exists(DATA_FILE):
+    raise FileNotFoundError(f"Data file not found at {DATA_FILE}. Please ensure testing_sql_data.csv exists in the data folder.")
+def process_query(question, chart_type="auto"):
+    try:
+        # Generate schema from CSV
+        schema = get_schema_from_csv(DATA_FILE)
+        # Generate SQL
+        sql_query = generate_sql(question, schema, sql_model, sql_tokenizer, device)
+        # --- Fix: Table and column name replacements ---
+        sql_query = re.sub(r'(FROM|JOIN)\s+\w+', r'\1 data', sql_query, flags=re.IGNORECASE)
+        sql_query = re.sub(r'(FROM|JOIN)\s+"[^"]+"', r'\1 data', sql_query, flags=re.IGNORECASE)
+        sql_query = re.sub(r'(FROM|JOIN)\s+\'[^"]+\'', r'\1 data', sql_query, flags=re.IGNORECASE)
+        sql_query = sql_query.replace('product_price', 'total_price')
+        sql_query = sql_query.replace('store_name', 'store_id')
+        sql_query = sql_query.replace('sales_method', 'date')
+        sql_query = re.sub(r'\bsales\b', 'total_price', sql_query)
+        # --- End fix ---
+        # Classify intent
+        intent = classify_intent(question, intent_model, intent_tokenizer, device, label_mapping)
+        # Execute SQL on the CSV data
+        df = pd.read_csv(DATA_FILE)
+        conn = sqlite3.connect(":memory:")
+        df.to_sql("data", conn, index=False, if_exists="replace")
+        result_df = pd.read_sql_query(sql_query, conn)
+        conn.close()
+        # Defensive check for result_df columns
+        if result_df.empty or len(result_df.columns) < 2:
+            chart_path = None
+            insights = "No results or not enough columns to display chart/insights."
+            return result_df, intent, sql_query, chart_path, insights
+        # Generate chart
+        chart_path = os.path.join(PROJECT_ROOT, "chart.png")
+        if not result_df.empty:
+            plt.figure(figsize=(10, 6))
+            if chart_type == "auto":
+                if intent == "trend":
+                    chart_type = "line"
+                elif intent == "comparison":
+                    chart_type = "bar"
+                else:
+                    chart_type = "bar"
+            if chart_type == "bar":
+                result_df.plot(kind="bar", x=result_df.columns[0], y=result_df.columns[1])
+            elif chart_type == "line":
+                result_df.plot(kind="line", x=result_df.columns[0], y=result_df.columns[1], marker='o')
+            elif chart_type == "pie":
+                result_df.plot(kind="pie", y=result_df.columns[1], labels=result_df[result_df.columns[0]])
+            plt.title(question)
+            plt.tight_layout()
+            plt.savefig(chart_path)
+            plt.close()
+        else:
+            chart_path = None
+        # Generate insights
+        insights = generate_insights(result_df, intent, question)
+        return result_df, intent, sql_query, chart_path, insights
+    except Exception as e:
+        return None, "Error", str(e), None, f"Error: {str(e)}"
+def generate_insights(result_df, intent, question):
+    if result_df is None or result_df.empty or len(result_df.columns) < 2:
+        return "No data available for insights."
+    insights = []
+    if intent == "summary":
+        try:
+            total = result_df[result_df.columns[1]].sum()
+            insights.append(f"Total {result_df.columns[1]}: {total:,.2f}")
+        except Exception:
+            pass
+    elif intent == "comparison":
+        if len(result_df) >= 2:
+            try:
+                highest = result_df.iloc[0]
+                lowest = result_df.iloc[-1]
+                diff = ((highest.iloc[1] / lowest.iloc[1] - 1) * 100)
+                insights.append(f"{highest.iloc[0]} is {diff:.1f}% higher than {lowest.iloc[0]}")
+            except Exception:
+                pass
+    elif intent == "trend":
+        if len(result_df) >= 2:
+            try:
+                first = result_df.iloc[0][result_df.columns[1]]
+                last = result_df.iloc[-1][result_df.columns[1]]
+                change = ((last / first - 1) * 100)
+                insights.append(f"Overall change: {change:+.1f}%")
+            except Exception:
+                pass
+    insights.append(f"Analysis covers {len(result_df)} records")
+    if "category" in result_df.columns:
+        insights.append(f"Number of categories: {result_df['category'].nunique()}")
+    return "\n".join(f"• {insight}" for insight in insights)
+# Clickable FAQs (6 only)
+faqs = [
+    "What are the top 5 products by quantity sold?",
+    "What is the total sales amount for each category?",
+    "Which store had the highest total sales?",
+    "What are the most popular payment methods?",
+    "What is the sales trend over time?",
+    "What is the average transaction value?"
+]
+def fill_question(faq):
+    return gr.update(value=faq)
+with gr.Blocks(title="RetailGenie - Natural Language to SQL") as demo:
+    gr.Markdown("""
+    # RetailGenie - Natural Language to SQL
+    Ask questions in natural language to generate SQL queries and visualizations. Using retail dataset with product sales information.
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            question = gr.Textbox(
+                label="Enter your question",
+                placeholder="What is the total sales amount for each product category?"
+            )
+            faq_radio = gr.Radio(faqs, label="FAQs (click to autofill)", interactive=True)
+            faq_radio.change(fn=fill_question, inputs=faq_radio, outputs=question)
+            chart_type = gr.Radio(
+                ["auto", "bar", "line", "pie"],
+                label="Chart Type",
+                value="auto"
+            )
+            submit_btn = gr.Button("Generate", variant="primary")
+        with gr.Column(scale=2):
+            with gr.Accordion("SQL and Intent Details", open=False):
+                intent_output = gr.Textbox(label="Predicted Intent")
+                sql_output = gr.Textbox(label="Generated SQL", lines=3)
+            results_df = gr.DataFrame(label="Query Results")
+            chart_output = gr.Image(label="Chart")
+            insights_output = gr.Textbox(label="Insights", lines=5)
+    submit_btn.click(
+        fn=process_query,
+        inputs=[question, chart_type],
+        outputs=[results_df, intent_output, sql_output, chart_output, insights_output]
+    )
+if __name__ == "__main__":
+    demo.launch()

code/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ # This file makes the code directory a Python package

code/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (124 Bytes). View file

code/__pycache__/train_intent_classifier_local.cpython-310.pyc ADDED Viewed

Binary file (2.51 kB). View file

code/__pycache__/train_sqlgen_t5_local.cpython-310.pyc ADDED Viewed

Binary file (2.41 kB). View file

code/cloud_train_intent_classifier_script.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import pandas as pd
+import os
+import argparse
+import shutil
+import tempfile
+import json
+from google.cloud import storage
+from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, Trainer, TrainingArguments
+from datasets import Dataset
+from sklearn.preprocessing import LabelEncoder
+import torch
+# CLI arguments
+parser = argparse.ArgumentParser()
+parser.add_argument("--dataset_path", type=str, required=True)
+parser.add_argument("--output_dir", type=str, required=True)
+args = parser.parse_args()
+# Load dataset
+print("📦 Loading dataset from:", args.dataset_path)
+df = pd.read_csv(args.dataset_path)
+df = df[["question", "intent"]]
+# Label encoding
+le = LabelEncoder()
+df["label"] = le.fit_transform(df["intent"])
+label_mapping = dict(zip(le.classes_, le.transform(le.classes_)))
+dataset = Dataset.from_pandas(df)
+# Tokenizer and model
+model_name = "distilbert-base-uncased"
+tokenizer = DistilBERTTokenizerFast.from_pretrained(model_name)
+model = DistilBertForSequenceClassification.from_pretrained(model_name, num_labels=len(label_mapping))
+def tokenize(example):
+    return tokenizer(example["question"], truncation=True, padding="max_length", max_length=128)
+dataset = dataset.map(tokenize)
+training_args = TrainingArguments(
+    output_dir="./results_intent_classifier",
+    per_device_train_batch_size=4,
+    num_train_epochs=10,
+    logging_dir="./logs_intent",
+    logging_steps=5,
+    save_strategy="epoch",
+    evaluation_strategy="no"
+)
+trainer = Trainer(model=model, args=training_args, train_dataset=dataset)
+trainer.train()
+# Save to temp dir
+local_dir = tempfile.mkdtemp()
+model.save_pretrained(local_dir)
+tokenizer.save_pretrained(local_dir)
+with open(os.path.join(local_dir, "label_mapping.json"), "w") as f:
+    json.dump(label_mapping, f)
+# Upload to GCS
+gcs_model_path = os.path.join(args.output_dir, "intent")
+bucket_name = gcs_model_path.split("/")[2]
+base_path = "/".join(gcs_model_path.split("/")[3:])
+client = storage.Client()
+for fname in os.listdir(local_dir):
+    local_path = os.path.join(local_dir, fname)
+    gcs_blob_path = os.path.join(base_path, fname)
+    print(f"⬆️ Uploading {fname} to gs://{bucket_name}/{gcs_blob_path}")
+    bucket = client.bucket(bucket_name)
+    blob = bucket.blob(gcs_blob_path)
+    blob.upload_from_filename(local_path)
+print(f"✅ Intent model successfully uploaded to gs://{bucket_name}/{base_path}")

code/cloud_train_sqlgen_t5_script.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import pandas as pd
+import os
+import argparse
+import shutil
+import tempfile
+from google.cloud import storage
+from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
+from datasets import Dataset
+import torch
+# CLI arguments
+parser = argparse.ArgumentParser()
+parser.add_argument("--dataset_path", type=str, required=True)
+parser.add_argument("--output_dir", type=str, required=True)
+args = parser.parse_args()
+print("📦 Loading dataset from:", args.dataset_path)
+df = pd.read_csv(args.dataset_path)
+df = df[["question", "sql"]].rename(columns={"question": "input_text", "sql": "target_text"})
+df["input_text"] = "translate question to SQL: " + df["input_text"]
+dataset = Dataset.from_pandas(df)
+# Load tokenizer and model
+model_name = "t5-small"
+tokenizer = T5Tokenizer.from_pretrained(model_name)
+model = T5ForConditionalGeneration.from_pretrained(model_name)
+def preprocess(example):
+    input_enc = tokenizer(example["input_text"], truncation=True, padding="max_length", max_length=128)
+    target_enc = tokenizer(example["target_text"], truncation=True, padding="max_length", max_length=128)
+    input_enc["labels"] = target_enc["input_ids"]
+    return input_enc
+tokenized_dataset = dataset.map(preprocess)
+# Training arguments
+training_args = TrainingArguments(
+    output_dir="./results_t5_sqlgen",
+    per_device_train_batch_size=4,
+    num_train_epochs=10,
+    logging_dir="./logs",
+    logging_steps=5,
+    save_strategy="epoch",
+    evaluation_strategy="no"
+)
+# Train model
+trainer = Trainer(model=model, args=training_args, train_dataset=tokenized_dataset)
+trainer.train()
+# Save model to temporary local directory
+local_dir = tempfile.mkdtemp()
+model.save_pretrained(local_dir)
+tokenizer.save_pretrained(local_dir)
+# Upload all files to GCS
+gcs_model_path = os.path.join(args.output_dir, "sqlgen")
+bucket_name = gcs_model_path.split("/")[2]
+base_path = "/".join(gcs_model_path.split("/")[3:])
+client = storage.Client()
+for fname in os.listdir(local_dir):
+    local_path = os.path.join(local_dir, fname)
+    gcs_blob_path = os.path.join(base_path, fname)
+    print(f"⬆️  Uploading {fname} to gs://{bucket_name}/{gcs_blob_path}")
+    bucket = client.bucket(bucket_name)
+    blob = bucket.blob(gcs_blob_path)
+    blob.upload_from_filename(local_path)
+print(f"✅ Model successfully uploaded to gs://{bucket_name}/{base_path}")

code/train_intent_classifier_local.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import torch
+from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification
+import os
+import json
+# Get project root directory
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+def load_model():
+    print("📦 Loading pre-trained intent classification model...")
+    model_name = "distilbert-base-uncased"
+    tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    # Define intent labels
+    intent_labels = [
+        "summary", "comparison", "trend", "anomaly", "forecast"
+    ]
+    num_labels = len(intent_labels)
+    # Create label mapping
+    label_mapping = {label: idx for idx, label in enumerate(intent_labels)}
+    # Load model with our number of labels
+    model = DistilBertForSequenceClassification.from_pretrained(
+        model_name, num_labels=num_labels
+    )
+    model = model.to(device)
+    model.eval()
+    return model, tokenizer, device, label_mapping
+def classify_intent(question, model, tokenizer, device, label_mapping):
+    # Tokenize input
+    inputs = tokenizer(
+        question,
+        return_tensors="pt",
+        truncation=True,
+        padding=True,
+        max_length=128
+    ).to(device)
+    # Get prediction
+    with torch.no_grad():
+        outputs = model(**inputs)
+        predicted_class_id = outputs.logits.argmax().item()
+    # Convert back to label
+    id2label = {v: k for k, v in label_mapping.items()}
+    intent = id2label[predicted_class_id]
+    return intent
+if __name__ == "__main__":
+    # Load the model
+    model, tokenizer, device, label_mapping = load_model()
+    # Save the model and label mapping
+    output_dir = os.path.join(PROJECT_ROOT, "model_intent_classifier")
+    print(f"💾 Saving model to {output_dir}")
+    os.makedirs(output_dir, exist_ok=True)
+    model.save_pretrained(output_dir)
+    tokenizer.save_pretrained(output_dir)
+    # Save label mapping
+    with open(os.path.join(output_dir, "label_mapping.json"), "w") as f:
+        json.dump(label_mapping, f)
+    print(f"✅ Model successfully saved to {output_dir}")
+    # Example usage
+    test_questions = [
+        "What is the total sales amount for each product category?",
+        "Compare sales between March and April",
+        "Show me the sales trend over the last 6 months",
+        "Which products have unusual sales patterns?",
+        "What will be the sales forecast for next month?"
+    ]
+    print("\nTesting intent classification:")
+    for question in test_questions:
+        intent = classify_intent(question, model, tokenizer, device, label_mapping)
+        print(f"Question: {question}")
+        print(f"Predicted intent: {intent}\n")

code/train_sqlgen_t5_local.py ADDED Viewed

	@@ -0,0 +1,76 @@

+import torch
+from transformers import T5Tokenizer, T5ForConditionalGeneration
+import os
+import pandas as pd
+# Get project root directory
+PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+def load_model():
+    print("📦 Loading pre-trained text-to-SQL model...")
+    model_name = "cssupport/t5-small-awesome-text-to-sql"
+    tokenizer = T5Tokenizer.from_pretrained(model_name)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    model = T5ForConditionalGeneration.from_pretrained(model_name)
+    model = model.to(device)
+    model.eval()
+    return model, tokenizer, device
+def generate_sql(question, schema, model, tokenizer, device):
+    # Format input as expected by the model
+    input_prompt = f"tables:\n{schema}\nquery for: {question}"
+    # Tokenize the input prompt
+    inputs = tokenizer(input_prompt, padding=True, truncation=True, return_tensors="pt").to(device)
+    # Generate SQL
+    with torch.no_grad():
+        outputs = model.generate(**inputs, max_length=512)
+    # Decode the output
+    generated_sql = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return generated_sql
+def get_schema_from_csv(csv_path):
+    """Generate CREATE TABLE statements from CSV file"""
+    df = pd.read_csv(csv_path)
+    columns = []
+    for col in df.columns:
+        # Infer column type
+        dtype = df[col].dtype
+        if dtype == 'int64':
+            col_type = 'INT'
+        elif dtype == 'float64':
+            col_type = 'DECIMAL(10,2)'
+        else:
+            col_type = 'VARCHAR(255)'
+        columns.append(f"{col} {col_type}")
+    table_name = os.path.splitext(os.path.basename(csv_path))[0]
+    create_table = f"CREATE TABLE {table_name} (\n    " + ",\n    ".join(columns) + "\n);"
+    return create_table
+if __name__ == "__main__":
+    # Load the pre-trained model
+    model, tokenizer, device = load_model()
+    # Save the model locally for future use
+    output_dir = os.path.join(PROJECT_ROOT, "model_sqlgen_t5")
+    print(f"💾 Saving model to {output_dir}")
+    os.makedirs(output_dir, exist_ok=True)
+    model.save_pretrained(output_dir)
+    tokenizer.save_pretrained(output_dir)
+    print(f"✅ Model successfully saved to {output_dir}")
+    # Example usage with CSV
+    csv_path = os.path.join(PROJECT_ROOT, "data", "retail_dataset.csv")
+    if os.path.exists(csv_path):
+        schema = get_schema_from_csv(csv_path)
+        print("\nGenerated schema from CSV:")
+        print(schema)
+        question = "What is the total sales amount for each product category?"
+        sql_query = generate_sql(question, schema, model, tokenizer, device)
+        print("\nExample usage:")
+        print(f"Question: {question}")
+        print(f"Generated SQL: {sql_query}")

data/retail_dataset.csv ADDED Viewed

	@@ -0,0 +1,11 @@

+question,intent,sql
+What are the top 5 selling products in March?,summary,"SELECT product_name, SUM(quantity) AS total_sold FROM transactions WHERE MONTH(date) = 3 GROUP BY product_name ORDER BY total_sold DESC LIMIT 5;"
+Which store had the highest revenue in April?,summary,"SELECT store_id, SUM(total_price) AS revenue FROM transactions WHERE MONTH(date) = 4 GROUP BY store_id ORDER BY revenue DESC LIMIT 1;"
+Compare returns between electronics and clothing in Q1.,comparison,"SELECT category, COUNT(*) AS return_count FROM returns WHERE category IN ('electronics', 'clothing') AND QUARTER(date) = 1 GROUP BY category;"
+Which products saw a sales drop compared to last month?,anomaly,"SELECT t1.product_id, t1.month, t1.sales, t2.sales AS last_month_sales FROM monthly_sales t1 JOIN monthly_sales t2 ON t1.product_id = t2.product_id AND t1.month = t2.month + 1 WHERE t1.sales < t2.sales;"
+Show the sales trend of iPhone 14 in the last 6 months.,trend,"SELECT MONTH(date) AS month, SUM(quantity) AS total_sales FROM transactions WHERE product_name = 'iPhone 14' AND date >= DATE_SUB(CURDATE(), INTERVAL 6 MONTH) GROUP BY month ORDER BY month;"
+Which category has the highest number of returns?,summary,"SELECT category, COUNT(*) AS total_returns FROM returns GROUP BY category ORDER BY total_returns DESC LIMIT 1;"
+What is the total sales for each product category?,summary,"SELECT category, SUM(total_price) AS total_sales FROM transactions GROUP BY category;"
+List the most returned products in the last month.,summary,"SELECT product_name, COUNT(*) AS return_count FROM returns WHERE date >= DATE_SUB(CURDATE(), INTERVAL 1 MONTH) GROUP BY product_name ORDER BY return_count DESC;"
+Which store had the lowest performance in Q2?,summary,"SELECT store_id, SUM(total_price) AS total_sales FROM transactions WHERE QUARTER(date) = 2 GROUP BY store_id ORDER BY total_sales ASC LIMIT 1;"
+What are the top 3 most popular products in electronics?,summary,"SELECT product_name, SUM(quantity) AS total_sold FROM transactions WHERE category = 'electronics' GROUP BY product_name ORDER BY total_sold DESC LIMIT 3;"

data/retail_schema.sql ADDED Viewed

	@@ -0,0 +1,30 @@

+-- Table: transactions
+CREATE TABLE transactions (
+    transaction_id INT PRIMARY KEY,
+    product_id INT,
+    product_name VARCHAR(100),
+    category VARCHAR(50),
+    quantity INT,
+    total_price DECIMAL(10, 2),
+    store_id INT,
+    date DATE
+);
+-- Table: returns
+CREATE TABLE returns (
+    return_id INT PRIMARY KEY,
+    product_id INT,
+    product_name VARCHAR(100),
+    category VARCHAR(50),
+    store_id INT,
+    date DATE
+);
+-- Table: monthly_sales
+CREATE TABLE monthly_sales (
+    product_id INT,
+    product_name VARCHAR(100),
+    month INT,
+    sales INT
+);

data/testing_sql_data.csv ADDED Viewed

	@@ -0,0 +1,21 @@

+product_id,product_name,category,quantity,total_price,store_id,payment_method,date
+1,Sneakers,Footwear,5,500,1,Credit Card,2023-03-01
+2,T-Shirt,Apparel,3,90,1,Cash,2023-03-02
+3,Laptop,Electronics,2,2000,2,Credit Card,2023-04-01
+4,Running Shoes,Footwear,7,700,2,Debit Card,2023-03-15
+5,Polo Shirt,Apparel,2,60,1,Cash,2023-03-20
+6,Jeans,Apparel,3,240,3,Credit Card,2023-03-05
+7,Smartwatch,Electronics,1,350,2,Mobile Payment,2023-03-10
+8,Hoodie,Apparel,2,100,1,Credit Card,2023-03-22
+9,Tablet,Electronics,1,800,3,Debit Card,2023-04-05
+10,Backpack,Accessories,4,150,1,Cash,2023-04-10
+11,Sports Shoes,Footwear,6,600,2,Credit Card,2023-03-12
+12,Headphones,Electronics,3,450,1,Mobile Payment,2023-03-18
+13,Baseball Cap,Apparel,5,75,3,Cash,2023-03-25
+14,Gaming Monitor,Electronics,2,400,2,Credit Card,2023-04-12
+15,Leather Wallet,Accessories,3,90,1,Debit Card,2023-03-28
+16,Winter Jacket,Apparel,2,180,2,Credit Card,2023-03-30
+17,Wireless Mouse,Electronics,4,120,3,Cash,2023-04-02
+18,Leather Belt,Accessories,2,60,1,Credit Card,2023-03-08
+19,Mechanical Keyboard,Electronics,1,100,2,Mobile Payment,2023-04-08
+20,Athletic Socks,Apparel,6,60,3,Cash,2023-03-14

model_intent_classifier/config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2",
+    "3": "LABEL_3",
+    "4": "LABEL_4"
+  },
+  "initializer_range": 0.02,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2,
+    "LABEL_3": 3,
+    "LABEL_4": 4
+  },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.20.0",
+  "vocab_size": 30522
+}

model_intent_classifier/label_mapping.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"summary": 0, "comparison": 1, "trend": 2, "anomaly": 3, "forecast": 4}

model_intent_classifier/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2652d23a878b3575ea53ee5f629b5578acd7bc8f082dcb804050f38ae34ffadb
+size 267864195

model_intent_classifier/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

model_intent_classifier/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

model_intent_classifier/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "name_or_path": "distilbert-base-uncased",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "special_tokens_map_file": null,
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

model_intent_classifier/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model_sqlgen_t5/config.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+  "_name_or_path": "cssupport/t5-small-awesome-text-to-sql",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "tf_legacy_loss": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.20.0",
+  "use_cache": true,
+  "vocab_size": 32128
+}

model_sqlgen_t5/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f92514c0d85ce64c114ac84d139b3d3c545ef077b66cc98cf5fdd8ae75d9ad8f
+size 242070639

model_sqlgen_t5/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,107 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

model_sqlgen_t5/spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
+size 791656

model_sqlgen_t5/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,114 @@

+{
+  "additional_special_tokens": [
+    "<extra_id_0>",
+    "<extra_id_1>",
+    "<extra_id_2>",
+    "<extra_id_3>",
+    "<extra_id_4>",
+    "<extra_id_5>",
+    "<extra_id_6>",
+    "<extra_id_7>",
+    "<extra_id_8>",
+    "<extra_id_9>",
+    "<extra_id_10>",
+    "<extra_id_11>",
+    "<extra_id_12>",
+    "<extra_id_13>",
+    "<extra_id_14>",
+    "<extra_id_15>",
+    "<extra_id_16>",
+    "<extra_id_17>",
+    "<extra_id_18>",
+    "<extra_id_19>",
+    "<extra_id_20>",
+    "<extra_id_21>",
+    "<extra_id_22>",
+    "<extra_id_23>",
+    "<extra_id_24>",
+    "<extra_id_25>",
+    "<extra_id_26>",
+    "<extra_id_27>",
+    "<extra_id_28>",
+    "<extra_id_29>",
+    "<extra_id_30>",
+    "<extra_id_31>",
+    "<extra_id_32>",
+    "<extra_id_33>",
+    "<extra_id_34>",
+    "<extra_id_35>",
+    "<extra_id_36>",
+    "<extra_id_37>",
+    "<extra_id_38>",
+    "<extra_id_39>",
+    "<extra_id_40>",
+    "<extra_id_41>",
+    "<extra_id_42>",
+    "<extra_id_43>",
+    "<extra_id_44>",
+    "<extra_id_45>",
+    "<extra_id_46>",
+    "<extra_id_47>",
+    "<extra_id_48>",
+    "<extra_id_49>",
+    "<extra_id_50>",
+    "<extra_id_51>",
+    "<extra_id_52>",
+    "<extra_id_53>",
+    "<extra_id_54>",
+    "<extra_id_55>",
+    "<extra_id_56>",
+    "<extra_id_57>",
+    "<extra_id_58>",
+    "<extra_id_59>",
+    "<extra_id_60>",
+    "<extra_id_61>",
+    "<extra_id_62>",
+    "<extra_id_63>",
+    "<extra_id_64>",
+    "<extra_id_65>",
+    "<extra_id_66>",
+    "<extra_id_67>",
+    "<extra_id_68>",
+    "<extra_id_69>",
+    "<extra_id_70>",
+    "<extra_id_71>",
+    "<extra_id_72>",
+    "<extra_id_73>",
+    "<extra_id_74>",
+    "<extra_id_75>",
+    "<extra_id_76>",
+    "<extra_id_77>",
+    "<extra_id_78>",
+    "<extra_id_79>",
+    "<extra_id_80>",
+    "<extra_id_81>",
+    "<extra_id_82>",
+    "<extra_id_83>",
+    "<extra_id_84>",
+    "<extra_id_85>",
+    "<extra_id_86>",
+    "<extra_id_87>",
+    "<extra_id_88>",
+    "<extra_id_89>",
+    "<extra_id_90>",
+    "<extra_id_91>",
+    "<extra_id_92>",
+    "<extra_id_93>",
+    "<extra_id_94>",
+    "<extra_id_95>",
+    "<extra_id_96>",
+    "<extra_id_97>",
+    "<extra_id_98>",
+    "<extra_id_99>"
+  ],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 100,
+  "model_max_length": 512,
+  "name_or_path": "cssupport/t5-small-awesome-text-to-sql",
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "special_tokens_map_file": null,
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+numpy==2.2.5
+pandas==2.2.3
+torch==2.7.0
+transformers==4.20.0
+datasets==3.6.0
+scikit-learn==1.5.0
+matplotlib==3.10.3
+gradio==5.29.1
+huggingface-hub==0.31.2
+sentencepiece==0.2.0
+tokenizers==0.12.1
+tqdm==4.67.1
+pillow==11.2.1
+aiohttp==3.11.18
+sqlparse==0.5.0