Spaces:

tlogandesigns
/

image-text-compliance

Sleeping

App Files Files

xet

Community

tlogandesigns commited on Aug 19

Commit

0823edb

1 Parent(s): 5a92002

ML_POSITIVE_LABELS

Browse files

Files changed (1) hide show

checker.py +45 -10

checker.py CHANGED Viewed

@@ -1,6 +1,3 @@
-"""
-checker.py — core logic for Image + Text Compliance Check
-"""
 from __future__ import annotations
 from pathlib import Path
 from typing import List, Optional, Dict, Any, Iterable, Union, Tuple
@@ -40,6 +37,12 @@ USE_TINY_ML = os.getenv("USE_TINY_ML", "1") == "1"
 HF_REPO = os.getenv("HF_REPO", "tlogandesigns/fairhousing-bert-tiny")
 HF_THRESH = float(os.getenv("HF_THRESH", "0.75"))
 BASE_DIR = Path(__file__).parent
 PHRASES_PATH = Path(os.getenv("PHRASES_PATH", str(BASE_DIR / "phrases.yaml")))
@@ -93,14 +96,12 @@ def contains_disclaimer(text: str, disclaimer: str) -> bool:
     return squeeze(disclaimer) in squeeze(text)
 @dataclass
 class Rule:
     regex: re.Pattern
     category: str
     suggests: list[str]
 PHRASE_RULES: list[Rule] = []
 PHRASES_ERROR: Optional[str] = None
@@ -173,6 +174,33 @@ if USE_TINY_ML:
         )
 def fair_housing_flags(text: str) -> List[str]:
     flags: List[str] = []
     t = (text or "")[:1500]
@@ -185,11 +213,9 @@ def fair_housing_flags(text: str) -> List[str]:
                 flags.append(rule.category)
     if hf_pipe:
         try:
-            pred = hf_pipe(t)
-            lbl = pred[0]["label"]
-            score = float(pred[0]["score"])
-            if (lbl in ("1", "LABEL_1", "violation", "POSITIVE")) and score >= HF_THRESH:
-                flags.append(f"MLFlag: model={HF_REPO} label={lbl} score={score:.2f}")
         except Exception as e:
             flags.append(f"MLFlag: inference error: {e}")
     return flags
@@ -353,6 +379,13 @@ def run_check(
     )
     img_findings, img_spans = find_rule_matches(itxt)
     ptxt_findings, ptxt_spans = find_rule_matches(ptxt)
     results = {
         "Fair_Housing": fair_housing_block,
         "img": img_block,
@@ -371,6 +404,8 @@ def run_check(
             "OCR": pytesseract is not None,
             "Categories": sorted({r.category for r in PHRASE_RULES}),
             "DisclaimerRequiredOnNonSocial": REQUIRE_DISCLAIMER_ON_NON_SOCIAL,
         },
     }
     send_email_notification(results)

 from __future__ import annotations
 from pathlib import Path
 from typing import List, Optional, Dict, Any, Iterable, Union, Tuple
 HF_REPO = os.getenv("HF_REPO", "tlogandesigns/fairhousing-bert-tiny")
 HF_THRESH = float(os.getenv("HF_THRESH", "0.75"))
+ML_POSITIVE_LABELS = {
+    s.strip().lower()
+    for s in re.split(r"\s*,\s*", os.getenv("ML_POSITIVE_LABELS", "Potential Violation,violation,positive,LABEL_1,1"))
+    if s.strip()
+}
 BASE_DIR = Path(__file__).parent
 PHRASES_PATH = Path(os.getenv("PHRASES_PATH", str(BASE_DIR / "phrases.yaml")))
     return squeeze(disclaimer) in squeeze(text)
 @dataclass
 class Rule:
     regex: re.Pattern
     category: str
     suggests: list[str]
 PHRASE_RULES: list[Rule] = []
 PHRASES_ERROR: Optional[str] = None
         )
+def _violation_score(pipe, text: str) -> float:
+    try:
+        preds = pipe(text, return_all_scores=True)
+        scores = {str(d["label"]).lower(): float(d["score"]) for d in preds[0]}
+    except TypeError:
+        preds = pipe(text)
+        if isinstance(preds, list) and preds:
+            p = preds[0]
+            label = str(p.get("label", "")).lower()
+            score = float(p.get("score", 0.0))
+            if label in ML_POSITIVE_LABELS:
+                return score
+            return score
+        return 0.0
+    except Exception:
+        return 0.0
+    for name in ML_POSITIVE_LABELS:
+        if name in scores:
+            return scores[name]
+    if "non-violation" in scores:
+        return 1.0 - scores["non-violation"]
+    candidates = {k: v for k, v in scores.items() if any(tok in k for tok in ("violat", "posit", "flag", "risk", "unsafe", "toxic"))}
+    if candidates:
+        return max(candidates.values())
+    return max(scores.values()) if scores else 0.0
 def fair_housing_flags(text: str) -> List[str]:
     flags: List[str] = []
     t = (text or "")[:1500]
                 flags.append(rule.category)
     if hf_pipe:
         try:
+            score = _violation_score(hf_pipe, t)
+            if score >= HF_THRESH:
+                flags.append(f"MLFlag: model={HF_REPO} score={score:.2f}")
         except Exception as e:
             flags.append(f"MLFlag: inference error: {e}")
     return flags
     )
     img_findings, img_spans = find_rule_matches(itxt)
     ptxt_findings, ptxt_spans = find_rule_matches(ptxt)
+    model_labels = []
+    try:
+        if hf_pipe is not None and hasattr(hf_pipe, "model") and hasattr(hf_pipe.model, "config"):
+            labels_map = getattr(hf_pipe.model.config, "id2label", {}) or {}
+            model_labels = list(labels_map.values())
+    except Exception:
+        model_labels = []
     results = {
         "Fair_Housing": fair_housing_block,
         "img": img_block,
             "OCR": pytesseract is not None,
             "Categories": sorted({r.category for r in PHRASE_RULES}),
             "DisclaimerRequiredOnNonSocial": REQUIRE_DISCLAIMER_ON_NON_SOCIAL,
+            "ModelLabels": model_labels,
+            "MLPositiveLabels": sorted(list(ML_POSITIVE_LABELS)),
         },
     }
     send_email_notification(results)