tlogandesigns commited on
Commit
9da12e6
·
1 Parent(s): 0dfa3b8
Files changed (6) hide show
  1. .DS_Store +0 -0
  2. app.py +112 -0
  3. checker.py +307 -0
  4. packages.txt +1 -0
  5. phrases.yaml +140 -0
  6. requirments.txt +7 -0
.DS_Store ADDED
Binary file (6.15 kB). View file
 
app.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os, json, gradio as gr
2
+ from PIL import Image
3
+ import pytesseract
4
+
5
+ # --- import your logic ---
6
+ from checker import (
7
+ evaluate_section,
8
+ fair_housing_flags,
9
+ COMPANY_NAME_DEFAULT,
10
+ COMPANY_PHONES_DEFAULT,
11
+ DISCLAIMER_DEFAULT,
12
+ contains_disclaimer,
13
+ count_name_instances,
14
+ count_phone_instances,
15
+ )
16
+
17
+ def run_check(image, ptxt, social, agent_name, agent_phone,
18
+ company_name, company_phones_json, disclaimer):
19
+ # OCR
20
+ itxt = ""
21
+ ocr_err = None
22
+ if image is not None:
23
+ try:
24
+ itxt = pytesseract.image_to_string(image)
25
+ except Exception as e:
26
+ ocr_err = str(e)
27
+
28
+ # Compose combined content
29
+ content = "\n\n".join([x for x in [itxt, ptxt, f"Social={social}"] if x])
30
+
31
+ # Fair-housing flags on combined content
32
+ fh_flags = fair_housing_flags(content)
33
+ fair_housing_block = {"compliant": len(fh_flags) == 0, "Flags": fh_flags}
34
+
35
+ # Parse office phones
36
+ try:
37
+ company_phones = json.loads(company_phones_json)
38
+ if isinstance(company_phones, str):
39
+ company_phones = [company_phones]
40
+ except Exception:
41
+ company_phones = COMPANY_PHONES_DEFAULT
42
+
43
+ # Social disclaimer toggle (same behavior as your prototype)
44
+ require_disclaimer_on_social = os.getenv("REQUIRE_DISCLAIMER_ON_SOCIAL", "1") == "1"
45
+
46
+ def eval_section(text):
47
+ flags = []
48
+
49
+ company_name_count = count_name_instances(text, company_name)
50
+ agent_name_count = count_name_instances(text, agent_name)
51
+ office_phone_count = count_phone_instances(text, company_phones)
52
+ agent_phone_count = count_phone_instances(text, [agent_phone] if agent_phone else [])
53
+
54
+ name_equal = (company_name_count == agent_name_count)
55
+ phone_equal = (office_phone_count == agent_phone_count)
56
+
57
+ disclaimer_ok = True
58
+ if social and require_disclaimer_on_social:
59
+ disclaimer_ok = contains_disclaimer(text, disclaimer)
60
+ if not disclaimer_ok:
61
+ flags.append("Missing disclaimer on social content")
62
+
63
+ if not name_equal:
64
+ flags.append(f"Name imbalance: company={company_name_count} vs agent={agent_name_count}")
65
+ if not phone_equal:
66
+ flags.append(f"Phone imbalance: office={office_phone_count} vs agent={agent_phone_count}")
67
+
68
+ compliant = name_equal and phone_equal and disclaimer_ok
69
+ return {"compliant": compliant, "Flags": flags}
70
+
71
+ img_block = eval_section(itxt)
72
+ if ocr_err:
73
+ img_block["Flags"].append(f"OCR error: {ocr_err}")
74
+
75
+ ptxt_block = eval_section(ptxt or "")
76
+
77
+ # final payload in your exact shape
78
+ payload = {
79
+ "Fair_Housing": fair_housing_block,
80
+ "img": img_block,
81
+ "Ptxt": ptxt_block
82
+ }
83
+ return json.dumps(payload, indent=2)
84
+
85
+ with gr.Blocks(title="Image + Text Compliance Check") as demo:
86
+ gr.Markdown("# Image + Text Compliance Check")
87
+
88
+ with gr.Row():
89
+ image = gr.Image(type="pil", label="Upload image (optional)")
90
+ ptxt = gr.Textbox(lines=8, label="Post Text (Ptxt)")
91
+
92
+ with gr.Row():
93
+ social = gr.Checkbox(label="Social", value=False)
94
+ agent_name = gr.Textbox(label="Agent Name", placeholder="e.g., Jane Doe")
95
+ agent_phone = gr.Textbox(label="Agent Phone (digits or formatted)")
96
+
97
+ with gr.Accordion("Advanced", open=False):
98
+ company_name = gr.Textbox(label="Company Name", value=COMPANY_NAME_DEFAULT)
99
+ company_phones_json = gr.Textbox(label="Company Phones (JSON list)", value=json.dumps(COMPANY_PHONES_DEFAULT))
100
+ disclaimer = gr.Textbox(label="Disclaimer", value=DISCLAIMER_DEFAULT)
101
+
102
+ run_btn = gr.Button("Run Compliance Check")
103
+ out = gr.Code(label="Result JSON", language="json")
104
+
105
+ run_btn.click(
106
+ fn=run_check,
107
+ inputs=[image, ptxt, social, agent_name, agent_phone, company_name, company_phones_json, disclaimer],
108
+ outputs=[out],
109
+ )
110
+
111
+ if __name__ == "__main__":
112
+ demo.launch()
checker.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ checker.py — core logic for Image + Text Compliance Check
3
+
4
+ This module is UI-agnostic (no FastAPI/Gradio). Import its functions from
5
+ app.py (Gradio) or an API layer. CPU-only; optional tiny HF classifier via env.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ from typing import List, Optional, Dict, Any, Iterable, Union
10
+ import os
11
+ import re
12
+ import json
13
+
14
+ try:
15
+ from PIL import Image # type: ignore
16
+ except Exception:
17
+ Image = None # Allows import without PIL when not doing OCR
18
+
19
+ try:
20
+ import pytesseract # type: ignore
21
+ except Exception:
22
+ pytesseract = None
23
+
24
+ # -----------------------------
25
+ # Config & Constants
26
+ # -----------------------------
27
+ COMPANY_NAME_DEFAULT = "Berkshire Hathaway HomeServices Beazley, REALTORS"
28
+ COMPANY_PHONES_DEFAULT = ["7068631775", "8032337111"]
29
+ DISCLAIMER_DEFAULT = (
30
+ "©2025 BHH Affiliates, LLC. An independently owned and operated franchisee of BHH Affiliates, LLC. "
31
+ "Berkshire Hathaway HomeServices and the Berkshire Hathaway HomeServices symbol are registered service marks "
32
+ "of Columbia Insurance Company, a Berkshire Hathaway affiliate. Equal Housing Opportunity."
33
+ )
34
+
35
+ # Behavior toggle for social posts requiring disclaimer (choose True/False)
36
+ REQUIRE_DISCLAIMER_ON_SOCIAL = os.getenv("REQUIRE_DISCLAIMER_ON_SOCIAL", "1") == "1"
37
+
38
+ # Optional HF classifier (tiny) – set USE_TINY_ML=1 to enable
39
+ USE_TINY_ML = os.getenv("USE_TINY_ML", "0") == "1"
40
+ HF_REPO = os.getenv("HF_REPO", "tlogandesigns/fairhousing-bert-tiny")
41
+ HF_THRESH = float(os.getenv("HF_THRESH", "0.75"))
42
+
43
+ # Rule-based phrases file (optional). If present, we use it for flags.
44
+ PHRASES_PATH = os.getenv("PHRASES_PATH", "phrases.yaml")
45
+
46
+ # -----------------------------
47
+ # Utilities
48
+ # -----------------------------
49
+ PHONE_RE = re.compile(r"\+?1?\D*([2-9]\d{2})\D*(\d{3})\D*(\d{4})")
50
+
51
+
52
+ def normalize_phone(s: str) -> str:
53
+ digits = re.sub(r"\D", "", s or "")
54
+ if len(digits) == 11 and digits.startswith("1"):
55
+ digits = digits[1:]
56
+ return digits
57
+
58
+
59
+ def count_phone_instances(text: str, target_numbers: Iterable[str]) -> int:
60
+ targets = {normalize_phone(n) for n in (target_numbers or []) if n}
61
+ count = 0
62
+ for m in PHONE_RE.finditer(text or ""):
63
+ num = "".join(m.groups())
64
+ if num in targets:
65
+ count += 1
66
+ return count
67
+
68
+
69
+ def escape_name_regex(name: str) -> str:
70
+ # Allow flexible whitespace and optional punctuation inside the name
71
+ parts = [re.escape(p) for p in (name or "").split() if p]
72
+ if not parts:
73
+ return r"" # no name
74
+ # Join with one-or-more whitespace OR punctuation between tokens
75
+ return r"\b" + r"[\s\-.,]+".join(parts) + r"\b"
76
+
77
+
78
+ def count_name_instances(text: str, name: str) -> int:
79
+ if not (name or "").strip():
80
+ return 0
81
+ pattern = re.compile(escape_name_regex(name), re.IGNORECASE)
82
+ return len(pattern.findall(text or ""))
83
+
84
+
85
+ def contains_disclaimer(text: str, disclaimer: str) -> bool:
86
+ if not disclaimer:
87
+ return False
88
+ # Relax matching a bit: compress whitespace in both
89
+ def squeeze(s: str) -> str:
90
+ return re.sub(r"\s+", " ", s or "").strip().lower()
91
+
92
+ return squeeze(disclaimer) in squeeze(text)
93
+
94
+
95
+ # -----------------------------
96
+ # Fair Housing Classifier (hybrid)
97
+ # -----------------------------
98
+ try:
99
+ import yaml # type: ignore
100
+ except Exception:
101
+ yaml = None
102
+
103
+ PHRASE_PATTERNS: List[re.Pattern] = []
104
+ if yaml and os.path.exists(PHRASES_PATH):
105
+ try:
106
+ data = yaml.safe_load(open(PHRASES_PATH, "r", encoding="utf-8").read()) or {}
107
+ for rx in data.get("patterns", []):
108
+ # compile as case-insensitive
109
+ PHRASE_PATTERNS.append(re.compile(rx, re.IGNORECASE))
110
+ except Exception as e:
111
+ print("Failed loading phrases.yaml:", e)
112
+
113
+ # Optional HF pipeline (disabled by default to keep CPU/lightweight)
114
+ hf_pipe = None
115
+ if USE_TINY_ML:
116
+ try:
117
+ from transformers import pipeline # type: ignore
118
+
119
+ hf_pipe = pipeline("text-classification", model=HF_REPO)
120
+ except Exception as e:
121
+ print("HF model unavailable:", e)
122
+ hf_pipe = None
123
+
124
+
125
+ def fair_housing_flags(text: str) -> List[str]:
126
+ flags: List[str] = []
127
+ t = text or ""
128
+
129
+ # Rule-based first
130
+ for pat in PHRASE_PATTERNS:
131
+ for m in pat.finditer(t):
132
+ snippet = t[max(0, m.start() - 30) : m.end() + 30]
133
+ flags.append(
134
+ f"RuleFlag: pattern '{pat.pattern}' matched around: {snippet!r}"
135
+ )
136
+
137
+ # Optional tiny model
138
+ if hf_pipe:
139
+ try:
140
+ pred = hf_pipe(t[:2000]) # keep it small
141
+ # Expecting [{'label': 'LABEL_1'/'LABEL_0', 'score': 0.x}] or custom labels
142
+ lbl = pred[0]["label"]
143
+ score = float(pred[0]["score"])
144
+ # Assume LABEL_1 = potential violation (adjust to your model labels)
145
+ if (lbl in ("1", "LABEL_1", "violation", "POSITIVE")) and score >= HF_THRESH:
146
+ flags.append(f"MLFlag: model={HF_REPO} label={lbl} score={score:.2f}")
147
+ except Exception as e:
148
+ flags.append(f"MLFlag: inference error: {e}")
149
+
150
+ return flags
151
+
152
+
153
+ # -----------------------------
154
+ # Core evaluation logic
155
+ # -----------------------------
156
+
157
+ def evaluate_section(
158
+ text: str,
159
+ social: bool,
160
+ company_name: str,
161
+ company_phones: List[str],
162
+ agent_name: str,
163
+ agent_phone: str,
164
+ disclaimer: str,
165
+ require_disclaimer_on_social: bool,
166
+ ) -> Dict[str, Any]:
167
+ flags: List[str] = []
168
+
169
+ # Counts
170
+ company_name_count = count_name_instances(text, company_name)
171
+ agent_name_count = count_name_instances(text, agent_name)
172
+
173
+ office_phone_count = count_phone_instances(text, company_phones)
174
+ agent_phone_count = count_phone_instances(text, [agent_phone] if agent_phone else [])
175
+
176
+ # Equality checks
177
+ name_equal = company_name_count == agent_name_count
178
+ phone_equal = office_phone_count == agent_phone_count
179
+
180
+ # Disclaimer logic
181
+ disclaimer_ok = True
182
+ if social and require_disclaimer_on_social:
183
+ disclaimer_ok = contains_disclaimer(text, disclaimer)
184
+ if not disclaimer_ok:
185
+ flags.append("Missing disclaimer on social content")
186
+
187
+ if not name_equal:
188
+ flags.append(
189
+ f"Name imbalance: company={company_name_count} vs agent={agent_name_count}"
190
+ )
191
+ if not phone_equal:
192
+ flags.append(
193
+ f"Phone imbalance: office={office_phone_count} vs agent={agent_phone_count}"
194
+ )
195
+
196
+ compliant = name_equal and phone_equal and disclaimer_ok
197
+
198
+ return {
199
+ "compliant": compliant,
200
+ "Flags": flags,
201
+ }
202
+
203
+
204
+ # -----------------------------
205
+ # OCR helper (optional)
206
+ # -----------------------------
207
+
208
+ def ocr_image(image: Union["Image.Image", bytes, None]) -> str:
209
+ """OCR a PIL image or raw bytes. Returns empty string if OCR not available."""
210
+ if image is None or pytesseract is None:
211
+ return ""
212
+ try:
213
+ if isinstance(image, bytes):
214
+ if Image is None:
215
+ return ""
216
+ from io import BytesIO
217
+
218
+ image = Image.open(BytesIO(image)).convert("RGB")
219
+ return pytesseract.image_to_string(image) # type: ignore[arg-type]
220
+ except Exception:
221
+ return ""
222
+
223
+
224
+ # -----------------------------
225
+ # Orchestration (UI-agnostic)
226
+ # -----------------------------
227
+
228
+ def run_check(
229
+ image: Optional["Image.Image"],
230
+ ptxt: str,
231
+ social: bool,
232
+ agent_name: str,
233
+ agent_phone: str,
234
+ *,
235
+ company_name: str = COMPANY_NAME_DEFAULT,
236
+ company_phones: Optional[List[str]] = None,
237
+ disclaimer: str = DISCLAIMER_DEFAULT,
238
+ require_disclaimer_on_social: Optional[bool] = None,
239
+ ) -> Dict[str, Any]:
240
+ """
241
+ Execute full pipeline and return payload dict with keys:
242
+ - Fair_Housing
243
+ - img
244
+ - Ptxt
245
+ """
246
+ company_phones = company_phones or COMPANY_PHONES_DEFAULT
247
+ if require_disclaimer_on_social is None:
248
+ require_disclaimer_on_social = REQUIRE_DISCLAIMER_ON_SOCIAL
249
+
250
+ itxt = ocr_image(image)
251
+
252
+ # Compose combined content
253
+ content = "\n\n".join(x for x in [itxt, ptxt or "", f"Social={social}"] if x)
254
+
255
+ # Fair-housing flags on combined content
256
+ fh_flags = fair_housing_flags(content)
257
+ fair_housing_block = {"compliant": len(fh_flags) == 0, "Flags": fh_flags}
258
+
259
+ # Evaluate image text section
260
+ img_block = evaluate_section(
261
+ text=itxt,
262
+ social=social,
263
+ company_name=company_name,
264
+ company_phones=company_phones,
265
+ agent_name=agent_name,
266
+ agent_phone=agent_phone,
267
+ disclaimer=disclaimer,
268
+ require_disclaimer_on_social=require_disclaimer_on_social,
269
+ )
270
+
271
+ # Evaluate post text section
272
+ ptxt_block = evaluate_section(
273
+ text=ptxt or "",
274
+ social=social,
275
+ company_name=company_name,
276
+ company_phones=company_phones,
277
+ agent_name=agent_name,
278
+ agent_phone=agent_phone,
279
+ disclaimer=disclaimer,
280
+ require_disclaimer_on_social=require_disclaimer_on_social,
281
+ )
282
+
283
+ return {
284
+ "Fair_Housing": fair_housing_block,
285
+ "img": img_block,
286
+ "Ptxt": ptxt_block,
287
+ }
288
+
289
+
290
+ __all__ = [
291
+ "COMPANY_NAME_DEFAULT",
292
+ "COMPANY_PHONES_DEFAULT",
293
+ "DISCLAIMER_DEFAULT",
294
+ "REQUIRE_DISCLAIMER_ON_SOCIAL",
295
+ "USE_TINY_ML",
296
+ "HF_REPO",
297
+ "HF_THRESH",
298
+ "PHRASES_PATH",
299
+ "count_phone_instances",
300
+ "count_name_instances",
301
+ "contains_disclaimer",
302
+ "fair_housing_flags",
303
+ "evaluate_section",
304
+ "ocr_image",
305
+ "run_check",
306
+ ]
307
+
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ tesseract-ocr
phrases.yaml ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Expanded Fair Housing phrase patterns
2
+ # Derived from the South Carolina Press Association 'Alphabetical List of Words/Phrases Connected with Advertisements for Housing'
3
+ # Use as guidance only. Not legal advice.
4
+
5
+ categories:
6
+ Familial status:
7
+ patterns:
8
+ - '(?:no|without) (?:kids|children|families)'
9
+ - 'adult(?:s)?[- ]only'
10
+ - 'adults? (?:preferred|only)'
11
+ - 'adult (?:building|community|living)'
12
+ - 'mature (?:adults?|tenants?)'
13
+ - 'newlyweds'
14
+ - 'empty nesters?'
15
+ - 'one (?:child|kid) only'
16
+ - 'one person only'
17
+ - '(?:sleeps|sleeping for)\s*[1-4]\b' # implies < 5 persons
18
+ - 'children,? \s*no'
19
+ - 'no children please'
20
+ - 'children not allowed'
21
+ - 'perfect for singles only'
22
+ - 'his and hers' # implies couple only
23
+ - 'play area provided' # note: may be fine, but can imply family targeting
24
+ suggest:
25
+ - 'Please inquire for occupancy guidelines'
26
+ - 'All qualified applicants are welcome'
27
+ - 'Suitable for a variety of household types'
28
+
29
+ Religion:
30
+ patterns:
31
+ - '\bchristians? only\b'
32
+ - '\bcatholics? only\b'
33
+ - '\bmormons? only\b'
34
+ - '\bmuslims? only\b'
35
+ - '\bjews\b|\bjewish only\b'
36
+ - '\bhindus? only\b'
37
+ - '\bbuddhists? only\b'
38
+ - '\bsikhs? only\b'
39
+ - 'no (?:christians?|catholics?|mormons?|muslims?|jews|jewish|hindus?|buddhists?|sikhs?)'
40
+ - '(?:christians?|catholics?|mormons?|muslims?|jews|jewish|hindus?|buddhists?|sikhs?)'
41
+ - 'christian (?:community|home|area)'
42
+ - 'ideal for [a-z]+ faith'
43
+ - 'close to (?:church|temple|synagogue|mosque|parish)'
44
+ - 'parish(?:,| )name of|parish, close to|parish close to'
45
+ suggest:
46
+ - 'Close to multiple houses of worship and community centers'
47
+ - 'Inclusive housing policy. All qualified applicants considered'
48
+
49
+ Disability:
50
+ patterns:
51
+ - 'able[- ]bodied only'
52
+ - 'must be ambulatory'
53
+ - 'independently,? capable of living'
54
+ - 'mentally (?:handicapped|ill|retarded)'
55
+ - 'physically fit (?:only|tenants?)'
56
+ - 'no (?:wheelchairs|service animals)' # note: service animals cannot be excluded
57
+ - 'handicapped|cripples?|retarded'
58
+ suggest:
59
+ - 'Please review accessibility details in the listing'
60
+ - 'Service animals accommodated per law'
61
+ - 'Accessible features listed where available'
62
+
63
+ Sex:
64
+ patterns:
65
+ - '(?:female|male) only (?:tenant|roommate)'
66
+ - 'ladies only'
67
+ - 'women only|men only'
68
+ - 'heterosexuals? only|straight\(s\) only'
69
+ - 'gay\(s\) only|lesbian only|homosexuals? only'
70
+ suggest:
71
+ - 'All qualified applicants are welcome'
72
+
73
+ Race or color:
74
+ patterns:
75
+ - '\bwhite\b (?:only|preferred)?\b'
76
+ - '\bblack\b (?:only|preferred)?\b'
77
+ - 'caucasian|oriental|colored|whites|blacks|asians|hispanics?|latinos?|native americans?'
78
+ - 'no (?:whites|blacks|asians|hispanics|latinos|native americans?)'
79
+ - 'no (?:white|black|asian|hispanic|latino|native american) applicants'
80
+ - 'no (?:white|black|asian|hispanic|latino|native american) tenants'
81
+ - 'no (?:white|black|asian|hispanic|latino|native american) families'
82
+ - 'race|color (?:when describing persons)?'
83
+ - 'integrated|interracial|mixed community' # steering risk
84
+ - 'exclusive neighborhood|exclusive street' # coded exclusion
85
+ - 'safe (?:neighborhood|area|home|community|block)' # vague safety coding
86
+ suggest:
87
+ - 'Neighborhood information available from public sources'
88
+ - 'Proximity to parks, transit, and amenities'
89
+
90
+ National origin and language:
91
+ patterns:
92
+ - 'english[- ]speaking(?: only)?|english speakers only'
93
+ - 'no immigrants|foreigners'
94
+ - 'mexican[- ]american|puerto rican|chinese|polish|irish|middle[- ]eastern(?:er)?' # when used to target
95
+ - 'u\.?s\.? citizen required'
96
+ - 'ethnic (?:neighborhood|group)'
97
+ suggest:
98
+ - 'Clear and complete application required for all'
99
+ - 'Multilingual applicants welcome'
100
+
101
+ Source of income and assistance:
102
+ patterns:
103
+ - 'no vouchers'
104
+ - 'no section ?8'
105
+ - 'ssi (?:no)|ssd (?:no)'
106
+ - 'public assistance (?:not accepted|no)'
107
+ - 'rent calculated per person'
108
+ suggest:
109
+ - 'Housing assistance programs evaluated per applicable law'
110
+ - 'Income verification may be required for all applicants'
111
+
112
+ Questionable targeting words:
113
+ # These are not per se violations under federal law, but commonly require human review for steering/targeting.
114
+ patterns:
115
+ - 'executive|professional|luxury'
116
+ - 'bachelor pad'
117
+ - 'country club'
118
+ - 'doorman building'
119
+ - 'quiet|quality neighborhood|quality home'
120
+ - 'perfect for (?:students|professionals|single)'
121
+ - 'students welcome'
122
+ - 'board|membership approval required'
123
+ - 'restricted|restrictions'
124
+ - 'traditional (?:home|style)'
125
+ - 'private (?:entrance|driveway)' # may be fine; flagged for context
126
+ - 'gay[- ]friendly|christian[- ]friendly' # potential steering
127
+ suggest:
128
+ - 'Describe objective features and amenities'
129
+ - 'Avoid suggesting a preference for a type of person'
130
+
131
+ Accessibility and seniors notes:
132
+ # These can be compliant if accurate and certified. Flag for review to add context.
133
+ patterns:
134
+ - 'senior(?:s)? (?:welcome|discount)'
135
+ - 'senior housing|housing for older persons'
136
+ - 'handicapped-accessible|wheelchair accessible'
137
+ suggest:
138
+ - 'If advertising senior housing, ensure HOPA certification is documented'
139
+ - 'List specific accessibility features rather than general labels'
140
+
requirments.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi==0.111.0
2
+ uvicorn[standard]==0.30.1
3
+ pillow==10.4.0
4
+ pytesseract==0.3.10
5
+ transformers==4.43.3
6
+ torch==2.3.1
7
+ pyyaml==6.0.2