Spaces:

gabrielchua
/

refactored-guacamole

Running

App Files Files Community

gabrielchua commited on 12 days ago

Commit

27a346a

unverified ·

1 Parent(s): c657583

update repo

Browse files

Files changed (8) hide show

.gitignore +167 -0
README.md +0 -12
app.py +261 -135
download_model.py +75 -0
lionguard2.py +170 -0
model.joblib +0 -3
requirements.txt +6 -8
utils.py +44 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,167 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+.python-version
+# pipenv
+Pipfile.lock
+# poetry
+poetry.lock
+# PDM
+pdm.lock
+__pypackages__/
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype
+.pytype/
+# Cython debug symbols
+cython_debug/
+# VS Code
+.vscode/
+# Mac
+.DS_Store
+# Model files and large data
+*.safetensors
+*.pt
+*.pth
+*.ckpt
+*.onnx
+*.h5
+*.bin
+*.npy
+*.npz
+*.tar
+*.tar.*
+*.zip
+*.gz
+*.bz2
+*.xz
+*.zst
+*.joblib
+*.pickle
+*.pkl
+*.msgpack
+*.arrow
+*.parquet
+*.tflite
+*.wasm
+*.mlmodel
+*.ftz
+*.rar
+*.7z
+# LFS cache and pointers
+*.lfs.*
+saved_model/**/*
+*tfevents*
+# Cache
+.cache/
+.cache/*
+# Environment
+.env
+.env.*
+.venv/
+venv/
+ENV/
+env/
+env.bak/
+venv.bak/
+# Gradio
+gradio_cached_examples/
+gradio_cache/
+.gradio/
+# Cache
+cache/

README.md DELETED Viewed

@@ -1,12 +0,0 @@
----
-title: Refactored Guacamole
-emoji: 📚
-colorFrom: yellow
-colorTo: pink
-sdk: gradio
-sdk_version: 5.20.1
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -1,170 +1,296 @@
 import os
 import gradio as gr
-import joblib
-import numpy as np
-import pandas as pd
-from openai import OpenAI
-from typing import List, Dict, Any
-# --- New Inference Code Components ---
-# Define categories with sub-level information
-CATEGORIES = {
-    'hateful': ['hateful_lvl_1_discriminatory', 'hateful_lvl_2_hate_speech'],
-    'insults': ['insults'],
-    'sexual': ['sexual_lvl_1_not_appropriate_for_minors', 'sexual_lvl_2_not_appropriate_for_all'],
-    'physical_violence': ['physical_violence'],
-    'self_harm': ['self_harm_lvl_1_intent', 'self_harm_lvl_2_action'],
-    'all_other_misconduct': ['all_other_misconduct_lvl_1_not_socially_accepted', 'all_other_misconduct_lvl_2_illegal']
-}
-def get_embeddings(texts: List[str], model: str = "text-embedding-3-large") -> np.ndarray:
     """
-    Generate embeddings for a list of texts using the OpenAI API synchronously.
     Args:
-        texts: List of strings to embed.
-        model: The OpenAI embedding model to use.
     Returns:
-        A numpy array of embeddings.
     """
-    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
-    MAX_TOKENS = 8191  # Maximum tokens for the embedding model
-    truncated_texts = [text[:MAX_TOKENS] for text in texts]
-    response = client.embeddings.create(
-        input=truncated_texts,
-        model=model
     )
-    embeddings = np.array([data.embedding for data in response.data])
-    return embeddings
-def run_model(model_file: str, embeddings: np.ndarray):
     """
-    Run the model on the embeddings.
     Args:
-        model_file: Path to the model file.
-        embeddings: Numpy array of embeddings.
-    Returns:
-        expanded_predictions, expanded_probabilities, expanded_label_names
     """
-    print("Loading model...")
-    model_data = joblib.load(model_file)
-    model = model_data['model']
-    label_names = model_data['label_names']
-    print("Predicting...")
-    # raw_predictions is a list of arrays with shape (n_samples, 2)
-    raw_predictions = model.predict(embeddings)
-    print("Processing predictions...")
-    predictions = []
-    probabilities = []
-    # Process each category's raw predictions
-    for i, pred in enumerate(raw_predictions):
-        # Convert raw predictions (P(y>0), P(y>1)) into a class from {0, 1, 2}
-        pred_class = np.zeros(len(pred))
-        pred_class += (pred[:, 0] > 0.5).astype(int)  # y > 0
-        pred_class += (pred[:, 1] > 0.5).astype(int)  # y > 1
-        predictions.append(pred_class)
-        # Calculate probabilities for each class:
-        # P(y=0) = 1 - P(y>0), P(y=1) = P(y>0) - P(y>1), P(y=2) = P(y>1)
-        prob = np.zeros((len(pred), 3))
-        prob[:, 0] = 1 - pred[:, 0]
-        prob[:, 1] = pred[:, 0] - pred[:, 1]
-        prob[:, 2] = pred[:, 1]
-        probabilities.append(prob)
-    predictions = np.array(predictions).T
-    probabilities = np.array(probabilities).transpose(1, 0, 2)
-    # Expand predictions to sub-levels
-    expanded_predictions = []
-    expanded_probabilities = []
-    expanded_label_names = []
-    for i, cat in enumerate(label_names):
-        # Level 1 binary
-        y_pred_l1 = (predictions[:, i] > 0).astype(int) # y == 1 or y == 2
-        y_proba_l1 = 1 - probabilities[:, i, 0]  # 1 - P(class 0)
-        # Level 2 binary
-        y_pred_l2 = (predictions[:, i] == 2).astype(int) # only y == 2
-        y_proba_l2 = probabilities[:, i, 2]  # Probability of class 2
-        if cat in ['binary', 'insults', 'physical_violence']:
-            expanded_predictions.append(y_pred_l1)
-            expanded_probabilities.append(y_proba_l1)
-            expanded_label_names.append(cat)
-        else:
-            expanded_predictions.append(y_pred_l1)
-            expanded_probabilities.append(y_proba_l1)
-            expanded_label_names.append(CATEGORIES[cat][0])
-            expanded_predictions.append(y_pred_l2)
-            expanded_probabilities.append(y_proba_l2)
-            expanded_label_names.append(CATEGORIES[cat][1])
-    expanded_predictions = np.array(expanded_predictions).T
-    expanded_probabilities = np.array(expanded_probabilities).T
-    return expanded_predictions, expanded_probabilities, expanded_label_names
-def format_output(predictions: np.ndarray, probabilities: np.ndarray, label_names: List[str]) -> pd.DataFrame:
     """
-    Format the output predictions into a DataFrame.
     Args:
-        predictions: Binary predictions.
-        probabilities: Associated prediction scores.
-        label_names: List of label names.
     Returns:
-        DataFrame with columns "Label", "Prediction", and "Score".
     """
-    # As our Gradio interface processes one text at a time, we use the first (and only) sample.
-    data = {
-        "Label": label_names,
-        "Prediction": predictions[0].tolist(),
-        "Score": np.round(probabilities[0], 4).tolist()
-    }
-    return pd.DataFrame(data)
-# --- Gradio App Integration ---
-# Define model file path (adjust as necessary)
-MODEL_FILE = "model.joblib"
-def classify_text(text: str):
     """
-    Given an input text, generates embeddings, runs the model inference,
-    and returns a DataFrame of classification results.
     """
     if not text.strip():
-        # Return an empty DataFrame if no text provided
-        empty_df = pd.DataFrame({"Label": [], "Prediction": [], "Score": []})
-        return gr.update(value=empty_df, visible=True)
-    # Obtain embeddings (input must be a list)
-    embeddings = get_embeddings([text])
-    # Run inference on the embeddings using the new model file
-    predictions, probabilities, label_names = run_model(MODEL_FILE, embeddings)
-    # Format the results to a DataFrame that Gradio can display
-    df = format_output(predictions, probabilities, label_names)
-    return gr.update(value=df, visible=True)
-with gr.Blocks(title="Zoo Entry 001 - Updated Inference") as iface:
-    input_text = gr.Textbox(lines=5, label="Input Text")
-    submit_btn = gr.Button("Submit")
-    output_table = gr.DataFrame(label="Classification Results", visible=False)
-    submit_btn.click(fn=classify_text, inputs=input_text, outputs=output_table)
 if __name__ == "__main__":
-    iface.launch()

+"""
+simple_demo.py - Gradio Web App for LionGuard2 Content Moderation
+"""
 import os
 import gradio as gr
+from safetensors.torch import load_file
+from huggingface_hub import hf_hub_download
+# Local imports
+from lionguard2 import LionGuard2, CATEGORIES
+from utils import get_embeddings
+def download_model(repo_id, filename="LionGuard2.safetensors", token=None):
     """
+    Download the LionGuard2 model from a Hugging Face private repository.
     Args:
+        repo_id: The Hugging Face repository ID (e.g., "username/repo-name")
+        filename: The filename to download (default: "LionGuard2.safetensors")
+        token: Hugging Face access token for private repositories
     Returns:
+        Path to the downloaded file
     """
+    if token is None:
+        token = os.environ.get("HF_API_KEY")
+    # Download the model file
+    model_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=filename,
+        token=token,
+        cache_dir="./cache"
     )
+    return model_path
+def load_model(repo_id=None, use_local=True):
     """
+    Load the LionGuard2 model from either local file or Hugging Face repository.
     Args:
+        repo_id: The Hugging Face repository ID (optional)
+        use_local: Whether to use local file first (default: True)
     """
+    model = LionGuard2()
+    model.eval()
+    model_path = "LionGuard2.safetensors"
+    # Try to download from HF repo if specified and local file doesn't exist or use_local is False
+    if repo_id and (not use_local or not os.path.exists(model_path)):
+        try:
+            print(f"Downloading LionGuard2.safetensors from {repo_id}...")
+            model_path = download_model(repo_id)
+            print(f"Model downloaded to: {model_path}")
+        except Exception as e:
+            print(f"Failed to download from HF repo: {e}")
+            if not os.path.exists("LionGuard2.safetensors"):
+                raise Exception("No local model file found and failed to download from HF repo")
+            print("Falling back to local file...")
+    state_dict = load_file(model_path)
+    model.load_state_dict(state_dict)
+    return model
+# Load model once at startup
+HF_REPO_ID = "gabrielchua/refactored-guacamole"  # Update this with the actual repo ID
+model = load_model(repo_id=HF_REPO_ID)
+def format_score_with_style(score_str, compact=False):
     """
+    Format score with color and emoji based on value.
     Args:
+        score_str: Score as string (could be "-" for missing values)
+        compact: Whether to use compact styling
     Returns:
+        HTML formatted string with styling
     """
+    if score_str == "-":
+        return '<span style="color: #9ca3af;">-</span>'
+    try:
+        score = float(score_str)
+        percentage = int(score * 100)
+        padding = "6px 12px" if compact else "8px 16px"
+        if score < 0.4:
+            # Safe - refined green
+            return f'<span style="background: linear-gradient(135deg, #065f46 0%, #047857 100%); color: #34d399; padding: {padding}; border-radius: 8px; font-size: 13px; font-weight: 600; border: 1px solid #10b981; box-shadow: 0 2px 4px rgba(0,0,0,0.2);">👌 {percentage}%</span>'
+        elif 0.4 <= score < 0.7:
+            # Warning - refined amber
+            return f'<span style="background: linear-gradient(135deg, #92400e 0%, #b45309 100%); color: #fbbf24; padding: {padding}; border-radius: 8px; font-size: 13px; font-weight: 600; border: 1px solid #f59e0b; box-shadow: 0 2px 4px rgba(0,0,0,0.2);">⚠️ {percentage}%</span>'
+        else:
+            # High risk - refined red
+            return f'<span style="background: linear-gradient(135deg, #991b1b 0%, #b91c1c 100%); color: #fca5a5; padding: {padding}; border-radius: 8px; font-size: 13px; font-weight: 600; border: 1px solid #ef4444; box-shadow: 0 2px 4px rgba(0,0,0,0.2);">🚨 {percentage}%</span>'
+    except:
+        return score_str
+def format_binary_score(score):
+    """Format the binary score with appropriate styling for dark mode."""
+    percentage = int(score * 100)
+    if score < 0.4:
+        return f'<div style="background: linear-gradient(135deg, #065f46 0%, #047857 100%); color: #34d399; padding: 24px; border-radius: 12px; text-align: center; font-weight: 700; border: 2px solid #10b981; font-size: 20px; box-shadow: 0 4px 12px rgba(0,0,0,0.3); margin: 16px 0;">✅ Pass ({percentage}/100)</div>'
+    elif 0.4 <= score < 0.7:
+        return f'<div style="background: linear-gradient(135deg, #92400e 0%, #b45309 100%); color: #fbbf24; padding: 24px; border-radius: 12px; text-align: center; font-weight: 700; border: 2px solid #f59e0b; font-size: 20px; box-shadow: 0 4px 12px rgba(0,0,0,0.3); margin: 16px 0;">⚠️ Warning ({percentage}/100)</div>'
+    else:
+        return f'<div style="background: linear-gradient(135deg, #991b1b 0%, #b91c1c 100%); color: #fca5a5; padding: 24px; border-radius: 12px; text-align: center; font-weight: 700; border: 2px solid #ef4444; font-size: 20px; box-shadow: 0 4px 12px rgba(0,0,0,0.3); margin: 16px 0;">🚨 Fail ({percentage}/100)</div>'
+def analyze_text(text):
     """
+    Analyze text for content moderation violations.
+    Args:
+        text: Input text to analyze
+    Returns:
+        binary_score: Overall safety score with styling
+        category_table: HTML table with category-specific scores and styling
     """
     if not text.strip():
+        empty_html = '<div style="text-align: center; color: #9ca3af; padding: 30px; font-style: italic;">Enter text to analyze</div>'
+        return '<div style="text-align: center; color: #9ca3af; padding: 30px; font-style: italic;">Enter text to analyze</div>', empty_html
+    try:
+        # Get embeddings for the text
+        embeddings = get_embeddings([text])
+        # Run inference
+        results = model.predict(embeddings)
+        # Extract binary score (overall safety)
+        binary_score = results.get('binary', [0.0])[0]
+        # Prepare category data with max scores and dropdowns
+        categories_html = []
+        # Define the main categories (excluding binary)
+        main_categories = ['hateful', 'insults', 'sexual', 'physical_violence', 'self_harm', 'all_other_misconduct']
+        for category in main_categories:
+            subcategories = CATEGORIES[category]
+            category_name = category.replace('_', ' ').title()
+            # Add emoji to category name based on type
+            category_emojis = {
+                'Hateful': '🤬',
+                'Insults': '💢',
+                'Sexual': '🔞',
+                'Physical Violence': '⚔️',
+                'Self Harm': '☹️',
+                'All Other Misconduct': '🙅‍♀️'
+            }
+            category_display = f"{category_emojis.get(category_name, '📝')} {category_name}"
+            # Get scores for all levels
+            level_scores = []
+            for i, subcategory_key in enumerate(subcategories):
+                score = results.get(subcategory_key, [0.0])[0]
+                level_scores.append((f"Level {i+1}", score))
+            # Find max score
+            max_score = max([score for _, score in level_scores]) if level_scores else 0.0
+            # Create the row HTML - just show max score
+            categories_html.append(f'''
+            <tr style="border-bottom: 1px solid #374151; transition: background-color 0.2s ease;">
+                <td style="padding: 16px; font-weight: 500; color: #f9fafb; font-size: 15px;">{category_display}</td>
+                <td style="padding: 16px; text-align: center;">{format_score_with_style(f"{max_score:.4f}")}</td>
+            </tr>
+            ''')
+        # Create refined HTML table for dark mode
+        html_table = f'''
+        <div style="margin: 24px 0;">
+            <div style="margin-bottom: 20px; text-align: center;">
+                <h2 style="color: #f9fafb; font-size: 20px; font-weight: 600; margin-bottom: 6px;">📊 Category-Specific Scores</h2>
+            </div>
+            <div style="background: #1f2937; border-radius: 12px; overflow: hidden; box-shadow: 0 4px 12px rgba(0,0,0,0.3); border: 1px solid #374151;">
+                <table style="width: 100%; border-collapse: collapse;">
+                    <thead>
+                        <tr style="background: linear-gradient(135deg, #374151 0%, #4b5563 100%);">
+                            <th style="padding: 16px; text-align: left; font-weight: 600; font-size: 15px; color: #f9fafb;">Category</th>
+                            <th style="padding: 16px; text-align: center; font-weight: 600; font-size: 15px; color: #f9fafb;">Score</th>
+                        </tr>
+                    </thead>
+                    <tbody>
+                        {"".join(categories_html)}
+                    </tbody>
+                </table>
+            </div>
+        </div>
+        '''
+        return format_binary_score(binary_score), html_table
+    except Exception as e:
+        error_msg = f"Error analyzing text: {str(e)}"
+        error_html = f'<div style="background: linear-gradient(135deg, #991b1b 0%, #b91c1c 100%); color: #fca5a5; padding: 20px; border-radius: 12px; text-align: center; border: 2px solid #ef4444; box-shadow: 0 4px 12px rgba(0,0,0,0.3);">❌ {error_msg}</div>'
+        return f'<div style="background: linear-gradient(135deg, #991b1b 0%, #b91c1c 100%); color: #fca5a5; padding: 16px; border-radius: 8px; text-align: center; border: 1px solid #ef4444;">❌ {error_msg}</div>', error_html
+# Create Gradio interface with dark theme
+with gr.Blocks(title="LionGuard2", theme=gr.themes.Base().set(
+    body_background_fill="*neutral_950",
+    background_fill_primary="*neutral_900",
+    background_fill_secondary="*neutral_800",
+    border_color_primary="*neutral_700",
+    color_accent_soft="*blue_500"
+)) as demo:
+    gr.HTML("""
+    <div style="text-align: center; margin-bottom: 40px; padding: 20px;">
+        <h1 style="color: #f9fafb; font-size: 36px; font-weight: 700; margin-bottom: 12px; text-shadow: 0 2px 4px rgba(0,0,0,0.3);">🦁 LionGuard2</h1>
+        <p style="color: #d1d5db; font-size: 16px; font-weight: 400; margin: 0;">Detect safety violations, and localised to Singapore</p>
+    </div>
+    """)
+    with gr.Row():
+        with gr.Column(scale=1, min_width=400):
+            text_input = gr.Textbox(
+                label="Enter text to analyze:",
+                placeholder="Type your text here...",
+                lines=12,
+                max_lines=20,
+                container=True
+            )
+            analyze_btn = gr.Button("🔍 Analyze Text", variant="primary")
+        with gr.Column(scale=1, min_width=400):
+            gr.HTML("""
+            <div style="margin-bottom: 24px; text-align: center;">
+                <h2 style="color: #f9fafb; font-size: 22px; font-weight: 600; margin-bottom: 8px;">Overall Safety Score</h2>
+                <p style="color: #d1d5db; font-size: 14px; margin: 0; opacity: 0.8;">Higher percentages indicate higher likelihood of harmful content</p>
+            </div>
+            """)
+            binary_output = gr.HTML(
+                value='<div style="text-align: center; color: #9ca3af; padding: 30px; font-style: italic;">Enter text to analyze</div>'
+            )
+            category_table = gr.HTML(
+                value='<div style="text-align: center; color: #9ca3af; padding: 30px; font-style: italic;">Category scores will appear here after analysis</div>'
+            )
+    # Add information about the categories
+    with gr.Row():
+        with gr.Accordion("ℹ️ About the Scoring System", open=False):
+            gr.HTML("""
+            <div style="font-size: 14px; line-height: 1.6; color: #f3f4f6; padding: 10px;">
+            <h3 style="color: #f9fafb; margin-bottom: 16px;">How Scoring Works:</h3>
+            <ul style="color: #d1d5db; margin-bottom: 24px;">
+                <li><b>Percentages represent likelihood of harmful content</b> - Higher % = More likely to be harmful</li>
+                <li><b>0-40%:</b> Content appears safe</li>
+                <li><b>40-70%:</b> Potentially concerning content that warrants review</li>
+                <li><b>70-100%:</b> High likelihood of policy violation</li>
+            </ul>
+            <h3 style="color: #f9fafb; margin-bottom: 16px;">Content Categories (Singapore Context):</h3>
+            <ul style="color: #d1d5db;">
+                <li><b>🤬 Hateful:</b> Content targeting Singapore's protected traits (e.g., race, religion), including discriminatory remarks and explicit calls for harm/violence.</li>
+                <li><b>💢 Insults:</b> Personal attacks on non-protected attributes (e.g., appearance). Note: Sexuality attacks are classified as insults, not hateful, in Singapore.</li>
+                <li><b>🔞 Sexual:</b> Sexual content or adult themes, ranging from mild content inappropriate for minors to explicit content inappropriate for general audiences.</li>
+                <li><b>⚔️ Physical Violence:</b> Threats, descriptions, or glorification of physical harm against individuals or groups (not property damage).</li>
+                <li><b>☹️ Self Harm:</b> Content about self-harm or suicide, including ideation, encouragement, or descriptions of ongoing actions.</li>
+                <li><b>🙅‍♀️ All Other Misconduct:</b> Unethical/criminal conduct not covered above, from socially condemned behavior to clearly illegal activities under Singapore law.</li>
+            </ul>
+            </div>
+            """)
+    # Connect the analyze button to the function
+    analyze_btn.click(
+        fn=analyze_text,
+        inputs=[text_input],
+        outputs=[binary_output, category_table]
+    )
+    # Allow Enter key to trigger analysis
+    text_input.submit(
+        fn=analyze_text,
+        inputs=[text_input],
+        outputs=[binary_output, category_table]
+    )
 if __name__ == "__main__":
+    demo.launch(share=True, server_name="0.0.0.0", server_port=7860)

download_model.py ADDED Viewed

	@@ -0,0 +1,75 @@

+#!/usr/bin/env python3
+"""
+download_model.py - Utility script to download LionGuard2 model from Hugging Face
+"""
+import os
+import argparse
+from huggingface_hub import hf_hub_download
+def download_lionguard2(repo_id, filename="LionGuard2.safetensors", token=None, output_dir="./"):
+    """
+    Download LionGuard2 model from Hugging Face private repository.
+    Args:
+        repo_id: The Hugging Face repository ID (e.g., "username/repo-name")
+        filename: The filename to download (default: "LionGuard2.safetensors")
+        token: Hugging Face access token for private repositories
+        output_dir: Directory to save the downloaded file
+    """
+    if token is None:
+        token = os.environ.get("HF_API_KEY")
+        if not token:
+            print("Error: No HF_API_KEY found in environment variables.")
+            print("Please set your Hugging Face token:")
+            print("export HF_API_KEY=your_token_here")
+            return False
+    try:
+        print(f"Downloading {filename} from {repo_id}...")
+        # Download the model file
+        model_path = hf_hub_download(
+            repo_id=repo_id,
+            filename=filename,
+            token=token,
+            local_dir=output_dir,
+            local_dir_use_symlinks=False  # Download actual file, not symlink
+        )
+        print(f"✅ Model successfully downloaded to: {model_path}")
+        return True
+    except Exception as e:
+        print(f"❌ Failed to download model: {e}")
+        return False
+def main():
+    parser = argparse.ArgumentParser(description="Download LionGuard2 model from Hugging Face")
+    parser.add_argument("repo_id", help="Hugging Face repository ID (e.g., username/repo-name)")
+    parser.add_argument("--filename", default="LionGuard2.safetensors", help="Filename to download")
+    parser.add_argument("--token", help="Hugging Face access token (optional if HF_API_KEY env var is set)")
+    parser.add_argument("--output-dir", default="./", help="Output directory for downloaded file")
+    args = parser.parse_args()
+    success = download_lionguard2(
+        repo_id=args.repo_id,
+        filename=args.filename,
+        token=args.token,
+        output_dir=args.output_dir
+    )
+    if success:
+        print(f"\n🎉 Ready to use! The model has been downloaded and can now be used by the application.")
+    else:
+        print(f"\n💡 Make sure you have:")
+        print(f"   1. Valid Hugging Face token with access to the private repository")
+        print(f"   2. Correct repository ID: {args.repo_id}")
+        print(f"   3. The model file exists in the repository")
+if __name__ == "__main__":
+    main()

lionguard2.py ADDED Viewed

	@@ -0,0 +1,170 @@

+"""
+lionguard2.py
+"""
+import torch
+import torch.nn as nn
+CATEGORIES = {
+    "binary": ["binary"],
+    "hateful": ["hateful_l1", "hateful_l2"],
+    "insults": ["insults"],
+    "sexual": [
+        "sexual_l1",
+        "sexual_l2",
+    ],
+    "physical_violence": ["physical_violence"],
+    "self_harm": ["self_harm_l1", "self_harm_l2"],
+    "all_other_misconduct": [
+        "all_other_misconduct_l1",
+        "all_other_misconduct_l2",
+    ],
+}
+INPUT_DIMENSION = 3072  # length of OpenAI embeddings
+class LionGuard2(nn.Module):
+    def __init__(
+        self,
+        input_dim=INPUT_DIMENSION,
+        label_names=CATEGORIES.keys(),
+        categories=CATEGORIES,
+    ):
+        """
+        LionGuard2 is a localised content moderation model that flags whether text violates the following categories:
+        1. `hateful`: Text that discriminates, criticizes, insults, denounces, or dehumanizes a person or group on the basis of a protected identity.
+        There are two sub-categories for the `hateful` category:
+        a. `level_1_discriminatory`: Text that contains derogatory or generalized negative statements targeting a protected group.
+        b. `level_2_hate_speech`: Text that explicitly calls for harm or violence against a protected group; or language praising or justifying violence against them.
+        2. `insults`: Text that insults demeans, humiliates, mocks, or belittles a person or group **without** referencing a legally protected trait.
+        For example, this includes personal attacks on attributes such as someone’s appearance, intellect, behavior, or other non-protected characteristics.
+        3. `sexual`: Text that depicts or indicates sexual interest, activity, or arousal, using direct or indirect references to body parts, sexual acts, or physical traits.
+        This includes sexual content that may be inappropriate for certain audiences.
+        There are two sub-categories for the `sexual` category:
+        a. `level_1_not_appropriate_for_minors`: Text that contains mild-to-moderate sexual content that is generally adult-oriented or potentially unsuitable for those under 16.
+            May include matter-of-fact discussions about sex, sexuality, or sexual preferences.
+        b. `level_2_not_appropriate_for_all_ages`: Text that contains content aimed at adults and considered explicit, graphic, or otherwise inappropriate for a broad audience.
+            May include explicit descriptions of sexual acts, detailed sexual fantasies, or highly sexualized content.
+        4. `physical_violence`: Text that includes glorification of violence or threats to inflict physical harm or injury on a person, group, or entity.
+        5. `self_harm`: Text that promotes, suggests, or expresses intent to self-harm or commit suicide.
+        There are two sub-categories for the `self_harm` category:
+        a. `level_1_self_harm_intent`: Text that expresses suicidal thoughts or self-harm intention; or content encouraging someone to self-harm.
+        b. `level_2_self_harm_action`: Text that describes or indicates ongoing or imminent self-harm behavior.
+        6. `all_other_misconduct`: This is a catch-all category for any other unsafe text that does not fit into the other categories.
+        It includes text that seeks or provides information about engaging in misconduct, wrongdoing, or criminal activity, or that threatens to harm,
+        defraud, or exploit others. This includes facilitating illegal acts (under Singapore law) or other forms of socially harmful activity.
+        There are two sub-categories for the `all_other_misconduct` category:
+        a. `level_1_not_socially_accepted`: Text that advocates or instructs on unethical/immoral activities that may not necessarily be illegal but are socially condemned.
+        b. `level_2_illegal_activities`: Text that seeks or provides instructions to carry out clearly illegal activities or serious wrongdoing; includes credible threats of severe harm.
+        Lastly, there is an additional `binary` category (#7) which flags whether the text is unsafe in general.
+        The model takes in as input text, after it has been encoded with OpenAI's `text-embedding-3-small` model.
+        The model outputs the probabilities of each category being true.
+        ================================
+        Args:
+            input_dim: The dimension of the input embeddings. This defaults to 3072, which is the dimension of the embeddings from OpenAI's `text-embedding-3-small` model. This should not be changed.
+            label_names: The names of the labels. This defaults to the keys of the CATEGORIES dictionary. This should not be changed.
+            categories: The categories of the labels. This defaults to the CATEGORIES dictionary. This should not be changed.
+        Returns:
+            A LionGuard2 model.
+        """
+        super(LionGuard2, self).__init__()
+        self.label_names = label_names
+        self.n_outputs = len(label_names)
+        self.categories = categories
+        # Shared layers
+        self.shared_layers = nn.Sequential(
+            nn.Linear(input_dim, 256),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(256, 128),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+        )
+        # Output heads for each label
+        self.output_heads = nn.ModuleList(
+            [
+                nn.Sequential(
+                    nn.Linear(128, 32),
+                    nn.ReLU(),
+                    nn.Linear(32, 2),  # 2 thresholds for ordinal classification
+                    nn.Sigmoid(),
+                )
+                for _ in range(self.n_outputs)
+            ]
+        )
+    def forward(self, x):
+        # Pass through shared layers
+        h = self.shared_layers(x)
+        # Pass through each output head
+        return [head(h) for head in self.output_heads]
+    def predict(self, embeddings):
+        """
+        Predict the probabilities of each label being true.
+        Args:
+            embeddings: A numpy array of embeddings (N * INPUT_DIMENSION)
+        Returns:
+            A dictionary of probabilities.
+        """
+        # Convert input to PyTorch tensor if not already
+        if not isinstance(embeddings, torch.Tensor):
+            x = torch.tensor(embeddings, dtype=torch.float32)
+        else:
+            x = embeddings
+        # Pass through model
+        with torch.no_grad():
+            outputs = self.forward(x)
+        # Stack outputs into a single tensor
+        raw_predictions = torch.stack(outputs)  # SIZE:
+        # Extract and format probabilities from raw predictions
+        output = {}
+        for i, main_cat in enumerate(self.label_names):
+            sub_categories = self.categories[main_cat]
+            for j, sub_cat in enumerate(sub_categories):
+                # j=0 uses P(y>0)
+                # j=1 uses P(y>1) if L2 category exists
+                output[sub_cat] = raw_predictions[i, :, j]
+            # Post processing step:
+            # If L2 category exists, and P(L2) > P(L1),
+            # Set both P(L1) and P(L2) to their average to maintain ordinal consistency
+            if len(sub_categories) > 1:
+                l1 = output[sub_categories[0]]
+                l2 = output[sub_categories[1]]
+                # Update probabilities on samples where P(L2) > P(L1)
+                mask = l2 > l1
+                mean_prob = (l1 + l2) / 2
+                l1[mask] = mean_prob[mask]
+                l2[mask] = mean_prob[mask]
+                output[sub_categories[0]] = l1
+                output[sub_categories[1]] = l2
+        for key, value in output.items():
+            output[key] = value.numpy().tolist()
+        return output

model.joblib DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6a2f7769a11fe468b2499d08e01ce7522d08a18b0103838404b69210c9b2616c
-size 20552060

requirements.txt CHANGED Viewed

@@ -1,8 +1,6 @@
-keras
-openai
-tensorflow
-joblib
-logfire
-scikit-learn
-pandas
-numpy

+gradio>=4.0.0
+torch>=2.0.0
+safetensors>=0.4.0
+openai>=1.0.0
+numpy>=1.24.0
+huggingface_hub>=0.19.0

utils.py ADDED Viewed

	@@ -0,0 +1,44 @@

+"""
+utils.py
+"""
+# Standard imports
+import os
+from typing import List
+# Third party imports
+import numpy as np
+from openai import OpenAI
+client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
+# Maximum tokens for text-embedding-3-large
+MAX_TOKENS = 8191  # We don't have access to the tokenizer for text-embedding-3-large, and just assume 1 character = 1 token here
+def get_embeddings(
+    texts: List[str], model: str = "text-embedding-3-large"
+) -> List[List[float]]:
+    """
+    Generate embeddings for a list of texts using OpenAI API synchronously.
+    Args:
+        texts: List of strings to embed.
+        model: OpenAI embedding model to use (default: text-embedding-3-large).
+    Returns:
+        A list of embeddings (each embedding is a list of floats).
+    Raises:
+        Exception: If the OpenAI API call fails.
+    """
+    # Truncate texts to max token limit
+    truncated_texts = [text[:MAX_TOKENS] for text in texts]
+    # Make the API call
+    response = client.embeddings.create(input=truncated_texts, model=model)
+    # Extract embeddings from response
+    embeddings = np.array([data.embedding for data in response.data])
+    return embeddings