Spaces:

a-ghorbani
/

ai-phone-leaderboard

Running

App Files Files Community

agh123 commited on Apr 6

Commit

157b914

1 Parent(s): 915fcba

chore: filter out anomalies (z_threshold=9.0)

Browse files

Files changed (2) hide show

src/services/firebase.py +9 -1
src/utils/anomaly.py +146 -0

src/services/firebase.py CHANGED Viewed

@@ -5,6 +5,8 @@ import pandas as pd
 import streamlit as st
 import json
 # Import the device lookup function
 from ..utils.device_lookup import get_device_name
@@ -91,6 +93,7 @@ def format_leaderboard_data(submissions: List[dict]) -> pd.DataFrame:
             formatted_data.append(
                 {
                     "Device": device_name,  # Use normalized device name
                     "Device ID": device_id,
                     "Platform": device_info.get("systemName", "Unknown"),
@@ -157,7 +160,12 @@ def format_leaderboard_data(submissions: List[dict]) -> pd.DataFrame:
             st.warning(f"Error processing submission: {str(e)}")
             continue
-    return pd.DataFrame(formatted_data)
 async def fetch_leaderboard_data(

 import streamlit as st
 import json
+from src.utils.anomaly import filter_anomalies
 # Import the device lookup function
 from ..utils.device_lookup import get_device_name
             formatted_data.append(
                 {
+                    "Submission ID": benchmark_result.get("uuid", "Unknown"),
                     "Device": device_name,  # Use normalized device name
                     "Device ID": device_id,
                     "Platform": device_info.get("systemName", "Unknown"),
             st.warning(f"Error processing submission: {str(e)}")
             continue
+    formatted_df = pd.DataFrame(formatted_data)
+    filtered_df, anomalies = filter_anomalies(
+        formatted_df, z_threshold=9.0, min_samples=5
+    )
+    print("Anomalies: ", anomalies)
+    return filtered_df
 async def fetch_leaderboard_data(

src/utils/anomaly.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import pandas as pd
+def add_model_size_groups(df, group_size=0.5, max_size=15):
+    """
+    Add a column to the DataFrame categorizing model file sizes into size groups.
+    Args:
+        df (pandas.DataFrame): DataFrame containing model benchmark data
+        group_size (float): Size of each group in GB (default: 0.5)
+        max_size (int): Maximum size in GB to consider (default: 15)
+    Returns:
+        pandas.DataFrame: Original DataFrame with an additional 'Size Group' column
+    """
+    if df is None or df.empty:
+        return df
+    result_df = df.copy()
+    if "Model Size GB" not in result_df.columns:
+        # Check if 'Model File Size' exists in the DataFrame
+        if "Model File Size" not in result_df.columns:
+            raise ValueError("DataFrame must contain 'Model File Size' column")
+        result_df["Model Size GB"] = result_df["Model File Size"] / 1024**3
+    # Define a function to assign size groups
+    def assign_size_group(size):
+        if size is None or pd.isna(size):
+            return "Unknown"
+        if size >= max_size:
+            return f">{max_size} GB"
+        import math
+        group_index = math.floor(size / group_size)
+        lower_bound = group_index * group_size
+        upper_bound = lower_bound + group_size
+        # Round to 1 decimal place to avoid floating point issues
+        lower_bound = round(lower_bound, 1)
+        upper_bound = round(upper_bound, 1)
+        return f"{lower_bound}-{upper_bound} GB"
+    result_df["Size Group"] = result_df["Model Size GB"].apply(assign_size_group)
+    return result_df
+def detect_anomalies(df, z_threshold=6.0, min_samples=5):
+    """
+    Detect anomalies in benchmark data.
+    Args:
+        df (pd.DataFrame): DataFrame containing benchmark data
+        z_threshold (float): Z-score threshold for anomaly detection (default: 6.0)
+        min_samples (int): Minimum number of samples needed for a group to calculate statistics
+    Returns:
+        pd.DataFrame: DataFrame containing detected anomalies with relevant information
+    """
+    if df is None or df.empty:
+        return pd.DataFrame()
+    # Ensure we have Size Group column
+    if "Size Group" not in df.columns:
+        df = add_model_size_groups(df)
+    anomalies = []
+    for metric in ["Prompt Processing", "Token Generation"]:
+        size_groups = df.groupby("Size Group")
+        for size_group, group_df in size_groups:
+            # Only process groups with enough samples
+            if len(group_df) < min_samples:
+                continue
+            mean_value = group_df[metric].mean()
+            std_value = group_df[metric].std()
+            # Skip if standard deviation is zero or very small
+            if std_value < 0.001:
+                continue
+            # Calculate z-scores for each entry
+            for _, row in group_df.iterrows():
+                value = row[metric]
+                if pd.isna(value):
+                    continue
+                z_score = abs((value - mean_value) / std_value)
+                # Flag as anomaly if z-score exceeds threshold
+                if z_score > z_threshold:
+                    anomaly_data = {
+                        "Size Group": size_group,
+                        "Model": row["Model"],
+                        "Device": row["Device"],
+                        "Device ID": row["Device ID"],
+                        "Platform": row["Platform"],
+                        "Metric": metric,
+                        "Value": value,
+                        "Mean": mean_value,
+                        "Std": std_value,
+                        "Z-Score": z_score,
+                        "Times Faster/Slower": value / mean_value,
+                        "Benchmark": row["Benchmark"],
+                        "Submission ID": row["Submission ID"],
+                    }
+                    anomalies.append(anomaly_data)
+    anomaly_df = pd.DataFrame(anomalies)
+    if not anomaly_df.empty:
+        anomaly_df = anomaly_df.sort_values(by="Z-Score", ascending=False)
+    return anomaly_df
+def filter_anomalies(df, z_threshold=9.0, min_samples=5):
+    """
+    Filter out anomalies from a DataFrame.
+    Args:
+        df (pd.DataFrame): DataFrame containing benchmark data
+        z_threshold (float): Z-score threshold for anomaly detection (default: 9.0)
+        min_samples (int): Minimum number of samples needed for a group to calculate statistics
+    Returns:
+        tuple: (filtered_df, anomalies_df) - the filtered DataFrame without anomalies and the anomalies DataFrame
+    """
+    if df is None or df.empty:
+        return df, pd.DataFrame()
+    # Find anomalies
+    anomalies = detect_anomalies(df, z_threshold, min_samples)
+    if anomalies.empty:
+        return df, anomalies
+    anomaly_ids = set(anomalies["Submission ID"].dropna().unique())
+    filtered_df = df[~df["Submission ID"].isin(anomaly_ids)]
+    return filtered_df, anomalies