Spaces:

tlogandesigns
/

image-text-compliance

Sleeping

App Files Files Community

tlogandesigns commited on Aug 14

Commit

9da12e6

1 Parent(s): 0dfa3b8

add init

Browse files

Files changed (6) hide show

.DS_Store +0 -0
app.py +112 -0
checker.py +307 -0
packages.txt +1 -0
phrases.yaml +140 -0
requirments.txt +7 -0

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,112 @@

+import os, json, gradio as gr
+from PIL import Image
+import pytesseract
+# --- import your logic ---
+from checker import (
+    evaluate_section,
+    fair_housing_flags,
+    COMPANY_NAME_DEFAULT,
+    COMPANY_PHONES_DEFAULT,
+    DISCLAIMER_DEFAULT,
+    contains_disclaimer,
+    count_name_instances,
+    count_phone_instances,
+)
+def run_check(image, ptxt, social, agent_name, agent_phone,
+              company_name, company_phones_json, disclaimer):
+    # OCR
+    itxt = ""
+    ocr_err = None
+    if image is not None:
+        try:
+            itxt = pytesseract.image_to_string(image)
+        except Exception as e:
+            ocr_err = str(e)
+    # Compose combined content
+    content = "\n\n".join([x for x in [itxt, ptxt, f"Social={social}"] if x])
+    # Fair-housing flags on combined content
+    fh_flags = fair_housing_flags(content)
+    fair_housing_block = {"compliant": len(fh_flags) == 0, "Flags": fh_flags}
+    # Parse office phones
+    try:
+        company_phones = json.loads(company_phones_json)
+        if isinstance(company_phones, str):
+            company_phones = [company_phones]
+    except Exception:
+        company_phones = COMPANY_PHONES_DEFAULT
+    # Social disclaimer toggle (same behavior as your prototype)
+    require_disclaimer_on_social = os.getenv("REQUIRE_DISCLAIMER_ON_SOCIAL", "1") == "1"
+    def eval_section(text):
+        flags = []
+        company_name_count = count_name_instances(text, company_name)
+        agent_name_count   = count_name_instances(text, agent_name)
+        office_phone_count = count_phone_instances(text, company_phones)
+        agent_phone_count  = count_phone_instances(text, [agent_phone] if agent_phone else [])
+        name_equal  = (company_name_count == agent_name_count)
+        phone_equal = (office_phone_count == agent_phone_count)
+        disclaimer_ok = True
+        if social and require_disclaimer_on_social:
+            disclaimer_ok = contains_disclaimer(text, disclaimer)
+            if not disclaimer_ok:
+                flags.append("Missing disclaimer on social content")
+        if not name_equal:
+            flags.append(f"Name imbalance: company={company_name_count} vs agent={agent_name_count}")
+        if not phone_equal:
+            flags.append(f"Phone imbalance: office={office_phone_count} vs agent={agent_phone_count}")
+        compliant = name_equal and phone_equal and disclaimer_ok
+        return {"compliant": compliant, "Flags": flags}
+    img_block  = eval_section(itxt)
+    if ocr_err:
+        img_block["Flags"].append(f"OCR error: {ocr_err}")
+    ptxt_block = eval_section(ptxt or "")
+    # final payload in your exact shape
+    payload = {
+        "Fair_Housing": fair_housing_block,
+        "img": img_block,
+        "Ptxt": ptxt_block
+    }
+    return json.dumps(payload, indent=2)
+with gr.Blocks(title="Image + Text Compliance Check") as demo:
+    gr.Markdown("# Image + Text Compliance Check")
+    with gr.Row():
+        image = gr.Image(type="pil", label="Upload image (optional)")
+        ptxt  = gr.Textbox(lines=8, label="Post Text (Ptxt)")
+    with gr.Row():
+        social      = gr.Checkbox(label="Social", value=False)
+        agent_name  = gr.Textbox(label="Agent Name", placeholder="e.g., Jane Doe")
+        agent_phone = gr.Textbox(label="Agent Phone (digits or formatted)")
+    with gr.Accordion("Advanced", open=False):
+        company_name        = gr.Textbox(label="Company Name", value=COMPANY_NAME_DEFAULT)
+        company_phones_json = gr.Textbox(label="Company Phones (JSON list)", value=json.dumps(COMPANY_PHONES_DEFAULT))
+        disclaimer          = gr.Textbox(label="Disclaimer", value=DISCLAIMER_DEFAULT)
+    run_btn = gr.Button("Run Compliance Check")
+    out     = gr.Code(label="Result JSON", language="json")
+    run_btn.click(
+        fn=run_check,
+        inputs=[image, ptxt, social, agent_name, agent_phone, company_name, company_phones_json, disclaimer],
+        outputs=[out],
+    )
+if __name__ == "__main__":
+    demo.launch()

checker.py ADDED Viewed

	@@ -0,0 +1,307 @@

+"""
+checker.py — core logic for Image + Text Compliance Check
+This module is UI-agnostic (no FastAPI/Gradio). Import its functions from
+app.py (Gradio) or an API layer. CPU-only; optional tiny HF classifier via env.
+"""
+from __future__ import annotations
+from typing import List, Optional, Dict, Any, Iterable, Union
+import os
+import re
+import json
+try:
+    from PIL import Image  # type: ignore
+except Exception:
+    Image = None  # Allows import without PIL when not doing OCR
+try:
+    import pytesseract  # type: ignore
+except Exception:
+    pytesseract = None
+# -----------------------------
+# Config & Constants
+# -----------------------------
+COMPANY_NAME_DEFAULT = "Berkshire Hathaway HomeServices Beazley, REALTORS"
+COMPANY_PHONES_DEFAULT = ["7068631775", "8032337111"]
+DISCLAIMER_DEFAULT = (
+    "©2025 BHH Affiliates, LLC. An independently owned and operated franchisee of BHH Affiliates, LLC. "
+    "Berkshire Hathaway HomeServices and the Berkshire Hathaway HomeServices symbol are registered service marks "
+    "of Columbia Insurance Company, a Berkshire Hathaway affiliate. Equal Housing Opportunity."
+)
+# Behavior toggle for social posts requiring disclaimer (choose True/False)
+REQUIRE_DISCLAIMER_ON_SOCIAL = os.getenv("REQUIRE_DISCLAIMER_ON_SOCIAL", "1") == "1"
+# Optional HF classifier (tiny) – set USE_TINY_ML=1 to enable
+USE_TINY_ML = os.getenv("USE_TINY_ML", "0") == "1"
+HF_REPO = os.getenv("HF_REPO", "tlogandesigns/fairhousing-bert-tiny")
+HF_THRESH = float(os.getenv("HF_THRESH", "0.75"))
+# Rule-based phrases file (optional). If present, we use it for flags.
+PHRASES_PATH = os.getenv("PHRASES_PATH", "phrases.yaml")
+# -----------------------------
+# Utilities
+# -----------------------------
+PHONE_RE = re.compile(r"\+?1?\D*([2-9]\d{2})\D*(\d{3})\D*(\d{4})")
+def normalize_phone(s: str) -> str:
+    digits = re.sub(r"\D", "", s or "")
+    if len(digits) == 11 and digits.startswith("1"):
+        digits = digits[1:]
+    return digits
+def count_phone_instances(text: str, target_numbers: Iterable[str]) -> int:
+    targets = {normalize_phone(n) for n in (target_numbers or []) if n}
+    count = 0
+    for m in PHONE_RE.finditer(text or ""):
+        num = "".join(m.groups())
+        if num in targets:
+            count += 1
+    return count
+def escape_name_regex(name: str) -> str:
+    # Allow flexible whitespace and optional punctuation inside the name
+    parts = [re.escape(p) for p in (name or "").split() if p]
+    if not parts:
+        return r""  # no name
+    # Join with one-or-more whitespace OR punctuation between tokens
+    return r"\b" + r"[\s\-.,]+".join(parts) + r"\b"
+def count_name_instances(text: str, name: str) -> int:
+    if not (name or "").strip():
+        return 0
+    pattern = re.compile(escape_name_regex(name), re.IGNORECASE)
+    return len(pattern.findall(text or ""))
+def contains_disclaimer(text: str, disclaimer: str) -> bool:
+    if not disclaimer:
+        return False
+    # Relax matching a bit: compress whitespace in both
+    def squeeze(s: str) -> str:
+        return re.sub(r"\s+", " ", s or "").strip().lower()
+    return squeeze(disclaimer) in squeeze(text)
+# -----------------------------
+# Fair Housing Classifier (hybrid)
+# -----------------------------
+try:
+    import yaml  # type: ignore
+except Exception:
+    yaml = None
+PHRASE_PATTERNS: List[re.Pattern] = []
+if yaml and os.path.exists(PHRASES_PATH):
+    try:
+        data = yaml.safe_load(open(PHRASES_PATH, "r", encoding="utf-8").read()) or {}
+        for rx in data.get("patterns", []):
+            # compile as case-insensitive
+            PHRASE_PATTERNS.append(re.compile(rx, re.IGNORECASE))
+    except Exception as e:
+        print("Failed loading phrases.yaml:", e)
+# Optional HF pipeline (disabled by default to keep CPU/lightweight)
+hf_pipe = None
+if USE_TINY_ML:
+    try:
+        from transformers import pipeline  # type: ignore
+        hf_pipe = pipeline("text-classification", model=HF_REPO)
+    except Exception as e:
+        print("HF model unavailable:", e)
+        hf_pipe = None
+def fair_housing_flags(text: str) -> List[str]:
+    flags: List[str] = []
+    t = text or ""
+    # Rule-based first
+    for pat in PHRASE_PATTERNS:
+        for m in pat.finditer(t):
+            snippet = t[max(0, m.start() - 30) : m.end() + 30]
+            flags.append(
+                f"RuleFlag: pattern '{pat.pattern}' matched around: {snippet!r}"
+            )
+    # Optional tiny model
+    if hf_pipe:
+        try:
+            pred = hf_pipe(t[:2000])  # keep it small
+            # Expecting [{'label': 'LABEL_1'/'LABEL_0', 'score': 0.x}] or custom labels
+            lbl = pred[0]["label"]
+            score = float(pred[0]["score"])
+            # Assume LABEL_1 = potential violation (adjust to your model labels)
+            if (lbl in ("1", "LABEL_1", "violation", "POSITIVE")) and score >= HF_THRESH:
+                flags.append(f"MLFlag: model={HF_REPO} label={lbl} score={score:.2f}")
+        except Exception as e:
+            flags.append(f"MLFlag: inference error: {e}")
+    return flags
+# -----------------------------
+# Core evaluation logic
+# -----------------------------
+def evaluate_section(
+    text: str,
+    social: bool,
+    company_name: str,
+    company_phones: List[str],
+    agent_name: str,
+    agent_phone: str,
+    disclaimer: str,
+    require_disclaimer_on_social: bool,
+) -> Dict[str, Any]:
+    flags: List[str] = []
+    # Counts
+    company_name_count = count_name_instances(text, company_name)
+    agent_name_count = count_name_instances(text, agent_name)
+    office_phone_count = count_phone_instances(text, company_phones)
+    agent_phone_count = count_phone_instances(text, [agent_phone] if agent_phone else [])
+    # Equality checks
+    name_equal = company_name_count == agent_name_count
+    phone_equal = office_phone_count == agent_phone_count
+    # Disclaimer logic
+    disclaimer_ok = True
+    if social and require_disclaimer_on_social:
+        disclaimer_ok = contains_disclaimer(text, disclaimer)
+        if not disclaimer_ok:
+            flags.append("Missing disclaimer on social content")
+    if not name_equal:
+        flags.append(
+            f"Name imbalance: company={company_name_count} vs agent={agent_name_count}"
+        )
+    if not phone_equal:
+        flags.append(
+            f"Phone imbalance: office={office_phone_count} vs agent={agent_phone_count}"
+        )
+    compliant = name_equal and phone_equal and disclaimer_ok
+    return {
+        "compliant": compliant,
+        "Flags": flags,
+    }
+# -----------------------------
+# OCR helper (optional)
+# -----------------------------
+def ocr_image(image: Union["Image.Image", bytes, None]) -> str:
+    """OCR a PIL image or raw bytes. Returns empty string if OCR not available."""
+    if image is None or pytesseract is None:
+        return ""
+    try:
+        if isinstance(image, bytes):
+            if Image is None:
+                return ""
+            from io import BytesIO
+            image = Image.open(BytesIO(image)).convert("RGB")
+        return pytesseract.image_to_string(image)  # type: ignore[arg-type]
+    except Exception:
+        return ""
+# -----------------------------
+# Orchestration (UI-agnostic)
+# -----------------------------
+def run_check(
+    image: Optional["Image.Image"],
+    ptxt: str,
+    social: bool,
+    agent_name: str,
+    agent_phone: str,
+    *,
+    company_name: str = COMPANY_NAME_DEFAULT,
+    company_phones: Optional[List[str]] = None,
+    disclaimer: str = DISCLAIMER_DEFAULT,
+    require_disclaimer_on_social: Optional[bool] = None,
+) -> Dict[str, Any]:
+    """
+    Execute full pipeline and return payload dict with keys:
+    - Fair_Housing
+    - img
+    - Ptxt
+    """
+    company_phones = company_phones or COMPANY_PHONES_DEFAULT
+    if require_disclaimer_on_social is None:
+        require_disclaimer_on_social = REQUIRE_DISCLAIMER_ON_SOCIAL
+    itxt = ocr_image(image)
+    # Compose combined content
+    content = "\n\n".join(x for x in [itxt, ptxt or "", f"Social={social}"] if x)
+    # Fair-housing flags on combined content
+    fh_flags = fair_housing_flags(content)
+    fair_housing_block = {"compliant": len(fh_flags) == 0, "Flags": fh_flags}
+    # Evaluate image text section
+    img_block = evaluate_section(
+        text=itxt,
+        social=social,
+        company_name=company_name,
+        company_phones=company_phones,
+        agent_name=agent_name,
+        agent_phone=agent_phone,
+        disclaimer=disclaimer,
+        require_disclaimer_on_social=require_disclaimer_on_social,
+    )
+    # Evaluate post text section
+    ptxt_block = evaluate_section(
+        text=ptxt or "",
+        social=social,
+        company_name=company_name,
+        company_phones=company_phones,
+        agent_name=agent_name,
+        agent_phone=agent_phone,
+        disclaimer=disclaimer,
+        require_disclaimer_on_social=require_disclaimer_on_social,
+    )
+    return {
+        "Fair_Housing": fair_housing_block,
+        "img": img_block,
+        "Ptxt": ptxt_block,
+    }
+__all__ = [
+    "COMPANY_NAME_DEFAULT",
+    "COMPANY_PHONES_DEFAULT",
+    "DISCLAIMER_DEFAULT",
+    "REQUIRE_DISCLAIMER_ON_SOCIAL",
+    "USE_TINY_ML",
+    "HF_REPO",
+    "HF_THRESH",
+    "PHRASES_PATH",
+    "count_phone_instances",
+    "count_name_instances",
+    "contains_disclaimer",
+    "fair_housing_flags",
+    "evaluate_section",
+    "ocr_image",
+    "run_check",
+]

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ tesseract-ocr

phrases.yaml ADDED Viewed

	@@ -0,0 +1,140 @@

+# Expanded Fair Housing phrase patterns
+# Derived from the South Carolina Press Association 'Alphabetical List of Words/Phrases Connected with Advertisements for Housing'
+# Use as guidance only. Not legal advice.
+categories:
+  Familial status:
+    patterns:
+      - '(?:no|without) (?:kids|children|families)'
+      - 'adult(?:s)?[- ]only'
+      - 'adults? (?:preferred|only)'
+      - 'adult (?:building|community|living)'
+      - 'mature (?:adults?|tenants?)'
+      - 'newlyweds'
+      - 'empty nesters?'
+      - 'one (?:child|kid) only'
+      - 'one person only'
+      - '(?:sleeps|sleeping for)\s*[1-4]\b'  # implies < 5 persons
+      - 'children,? \s*no'
+      - 'no children please'
+      - 'children not allowed'
+      - 'perfect for singles only'
+      - 'his and hers'  # implies couple only
+      - 'play area provided'  # note: may be fine, but can imply family targeting
+    suggest:
+      - 'Please inquire for occupancy guidelines'
+      - 'All qualified applicants are welcome'
+      - 'Suitable for a variety of household types'
+  Religion:
+    patterns:
+      - '\bchristians? only\b'
+      - '\bcatholics? only\b'
+      - '\bmormons? only\b'
+      - '\bmuslims? only\b'
+      - '\bjews\b|\bjewish only\b'
+      - '\bhindus? only\b'
+      - '\bbuddhists? only\b'
+      - '\bsikhs? only\b'
+      - 'no (?:christians?|catholics?|mormons?|muslims?|jews|jewish|hindus?|buddhists?|sikhs?)'
+      - '(?:christians?|catholics?|mormons?|muslims?|jews|jewish|hindus?|buddhists?|sikhs?)'
+      - 'christian (?:community|home|area)'
+      - 'ideal for [a-z]+ faith'
+      - 'close to (?:church|temple|synagogue|mosque|parish)'
+      - 'parish(?:,| )name of|parish, close to|parish close to'
+    suggest:
+      - 'Close to multiple houses of worship and community centers'
+      - 'Inclusive housing policy. All qualified applicants considered'
+  Disability:
+    patterns:
+      - 'able[- ]bodied only'
+      - 'must be ambulatory'
+      - 'independently,? capable of living'
+      - 'mentally (?:handicapped|ill|retarded)'
+      - 'physically fit (?:only|tenants?)'
+      - 'no (?:wheelchairs|service animals)'  # note: service animals cannot be excluded
+      - 'handicapped|cripples?|retarded'
+    suggest:
+      - 'Please review accessibility details in the listing'
+      - 'Service animals accommodated per law'
+      - 'Accessible features listed where available'
+  Sex:
+    patterns:
+      - '(?:female|male) only (?:tenant|roommate)'
+      - 'ladies only'
+      - 'women only|men only'
+      - 'heterosexuals? only|straight\(s\) only'
+      - 'gay\(s\) only|lesbian only|homosexuals? only'
+    suggest:
+      - 'All qualified applicants are welcome'
+  Race or color:
+    patterns:
+      - '\bwhite\b (?:only|preferred)?\b'
+      - '\bblack\b (?:only|preferred)?\b'
+      - 'caucasian|oriental|colored|whites|blacks|asians|hispanics?|latinos?|native americans?'
+      - 'no (?:whites|blacks|asians|hispanics|latinos|native americans?)'
+      - 'no (?:white|black|asian|hispanic|latino|native american) applicants'
+      - 'no (?:white|black|asian|hispanic|latino|native american) tenants'
+      - 'no (?:white|black|asian|hispanic|latino|native american) families'
+      - 'race|color (?:when describing persons)?'
+      - 'integrated|interracial|mixed community'  # steering risk
+      - 'exclusive neighborhood|exclusive street'  # coded exclusion
+      - 'safe (?:neighborhood|area|home|community|block)'  # vague safety coding
+    suggest:
+      - 'Neighborhood information available from public sources'
+      - 'Proximity to parks, transit, and amenities'
+  National origin and language:
+    patterns:
+      - 'english[- ]speaking(?: only)?|english speakers only'
+      - 'no immigrants|foreigners'
+      - 'mexican[- ]american|puerto rican|chinese|polish|irish|middle[- ]eastern(?:er)?'  # when used to target
+      - 'u\.?s\.? citizen required'
+      - 'ethnic (?:neighborhood|group)'
+    suggest:
+      - 'Clear and complete application required for all'
+      - 'Multilingual applicants welcome'
+  Source of income and assistance:
+    patterns:
+      - 'no vouchers'
+      - 'no section ?8'
+      - 'ssi (?:no)|ssd (?:no)'
+      - 'public assistance (?:not accepted|no)'
+      - 'rent calculated per person'
+    suggest:
+      - 'Housing assistance programs evaluated per applicable law'
+      - 'Income verification may be required for all applicants'
+  Questionable targeting words:
+    # These are not per se violations under federal law, but commonly require human review for steering/targeting.
+    patterns:
+      - 'executive|professional|luxury'
+      - 'bachelor pad'
+      - 'country club'
+      - 'doorman building'
+      - 'quiet|quality neighborhood|quality home'
+      - 'perfect for (?:students|professionals|single)'
+      - 'students welcome'
+      - 'board|membership approval required'
+      - 'restricted|restrictions'
+      - 'traditional (?:home|style)'
+      - 'private (?:entrance|driveway)'  # may be fine; flagged for context
+      - 'gay[- ]friendly|christian[- ]friendly'  # potential steering
+    suggest:
+      - 'Describe objective features and amenities'
+      - 'Avoid suggesting a preference for a type of person'
+  Accessibility and seniors notes:
+    # These can be compliant if accurate and certified. Flag for review to add context.
+    patterns:
+      - 'senior(?:s)? (?:welcome|discount)'
+      - 'senior housing|housing for older persons'
+      - 'handicapped-accessible|wheelchair accessible'
+    suggest:
+      - 'If advertising senior housing, ensure HOPA certification is documented'
+      - 'List specific accessibility features rather than general labels'

requirments.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi==0.111.0
+uvicorn[standard]==0.30.1
+pillow==10.4.0
+pytesseract==0.3.10
+transformers==4.43.3
+torch==2.3.1
+pyyaml==6.0.2