|
""" |
|
ContentGuardian - Content Audit Agent |
|
China ยท Simplified Chinese ยท Hanyu Xinxie Style ยท Text-based Version |
|
Optimized for Hugging Face Spaces |
|
""" |
|
import gradio as gr |
|
import datetime |
|
|
|
def comprehensive_text_audit(text, keywords=""): |
|
""" |
|
Comprehensive text audit - structured text output |
|
""" |
|
if not text.strip(): |
|
return "โ Please enter text content" |
|
|
|
|
|
keyword_list = [k.strip() for k in keywords.split(",") if k.strip()] if keywords else [] |
|
|
|
|
|
inappropriate_words = [ |
|
"็ปๅฏน", "ๅฎๅ
จ", "100%", "ๆฐธ่ฟ", "ไปไธ", "ๅฟ
้กป", "ไธๅฎ", |
|
"็ฌฌไธ", "ๆๅฅฝ", "ๆไฝณ", "ๆไผ", "ๆๅผบ", "ๆๅคง", "ๆๅฐ", |
|
"ๆๅบฆ", "่ถ
็บง", "้กถ็บง", "็็", "ๅ ๅ", "้ธไธป", |
|
"็ซๅณ", "้ฉฌไธ", "็ฌ้ด", "็งๆ", "ๆฅ้", "้ฃ้", |
|
"็ฅๅฅ", "ๅฅ่ฟน", "ไธ่ฝ", "ๆ ๆ", "ๅฎ็พ", "็ปๆ", |
|
"absolutely", "completely", "perfect", "ultimate", "best", "first", |
|
"immediately", "instantly", "magical", "miracle", "supreme" |
|
] |
|
|
|
found_inappropriate = [] |
|
for word in inappropriate_words: |
|
if word.lower() in text.lower(): |
|
positions = [] |
|
start = 0 |
|
text_lower = text.lower() |
|
word_lower = word.lower() |
|
while True: |
|
pos = text_lower.find(word_lower, start) |
|
if pos == -1: |
|
break |
|
positions.append(pos) |
|
start = pos + 1 |
|
if positions: |
|
found_inappropriate.append({ |
|
"word": word, |
|
"count": len(positions), |
|
"positions": positions |
|
}) |
|
|
|
|
|
found_keywords = [] |
|
for keyword in keyword_list: |
|
if keyword.lower() in text.lower(): |
|
positions = [] |
|
start = 0 |
|
text_lower = text.lower() |
|
keyword_lower = keyword.lower() |
|
while True: |
|
pos = text_lower.find(keyword_lower, start) |
|
if pos == -1: |
|
break |
|
positions.append(pos) |
|
start = pos + 1 |
|
if positions: |
|
found_keywords.append({ |
|
"keyword": keyword, |
|
"count": len(positions), |
|
"positions": positions |
|
}) |
|
|
|
|
|
total_issues = len(found_inappropriate) |
|
severity = "ไฝ" if total_issues < 3 else "ไธญ" if total_issues < 6 else "้ซ" |
|
|
|
|
|
report = f""" |
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
|
โ ๐ ๅ
ๅฎนๅฎกๆ ธๆฅๅ โ |
|
โ ไธญๅฝยท็ฎไฝไธญๆยทๆฑ่ฏญๆฐ่งฃ้ฃๆ ผ โ |
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
|
|
|
๐ ๅฎกๆ ธๆฆ่ง |
|
โโ ๆป้ฎ้ขๆฐ๏ผ{total_issues} ๅค |
|
โโ ไธฅ้็จๅบฆ๏ผ{severity} |
|
โโ ๆๆฌ้ฟๅบฆ๏ผ{len(text)} ๅญ็ฌฆ |
|
โโ ๅฎกๆ ธๆถ้ด๏ผ{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} |
|
|
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
|
|
|
๐ซ ไธๅฝๅนฟๅ็จ่ฏญๆฃๆต ({len(found_inappropriate)} ๅค) |
|
""" |
|
|
|
if found_inappropriate: |
|
for i, item in enumerate(found_inappropriate, 1): |
|
report += f""" |
|
{i}. ่ฏๆฑ๏ผ"{item['word']}" |
|
โโ ๅบ็ฐๆฌกๆฐ๏ผ{item['count']} ๆฌก |
|
โโ ไฝ็ฝฎ๏ผ{', '.join(map(str, item['positions']))} |
|
""" |
|
else: |
|
report += "\n โ
ๆชๅ็ฐไธๅฝๅนฟๅ็จ่ฏญ\n" |
|
|
|
report += f""" |
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
|
|
|
๐ ๅ
ณ้ฎ่ฏๆ ่ฎฐ ({len(found_keywords)} ๅค) |
|
""" |
|
|
|
if found_keywords: |
|
for i, item in enumerate(found_keywords, 1): |
|
report += f""" |
|
{i}. ๅ
ณ้ฎ่ฏ๏ผ"{item['keyword']}" |
|
โโ ๅบ็ฐๆฌกๆฐ๏ผ{item['count']} ๆฌก |
|
โโ ไฝ็ฝฎ๏ผ{', '.join(map(str, item['positions']))} |
|
""" |
|
else: |
|
report += "\n โน๏ธ ๆชๆๅฎๅ
ณ้ฎ่ฏๆๅ
ณ้ฎ่ฏๆชๅบ็ฐ\n" |
|
|
|
report += f""" |
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
|
|
|
๐ ่ฏฆ็ปๅๆ |
|
โโ ๅ่งๅปบ่ฎฎ๏ผ |
|
""" |
|
|
|
if found_inappropriate: |
|
report += f"""โ โข ๅปบ่ฎฎไฟฎๆนๆๅ ้ค {len(found_inappropriate)} ไธชไธๅฝ็จ่ฏ |
|
โ โข ไฝฟ็จ็ธๅฏนๆง่กจ่ฟฐๆฟไปฃ็ปๅฏนๅ็จ่ฏ |
|
โ โข ้ฟๅ
ๅคธๅคงๅฎฃไผ ๅ่ฏฏๅฏผๆง่กจ่ฟฐ |
|
""" |
|
else: |
|
report += "โ โข ๅ
ๅฎน่กจ่ฟฐ่ง่๏ผ็ฌฆๅๅนฟๅๆณ่ฆๆฑ\n" |
|
|
|
report += f"""โโ ้ฃ้ฉ็ญ็บง๏ผ{severity} |
|
โโ ๅค็ๅปบ่ฎฎ๏ผ{"้่ฆ้็นๅ
ณๆณจๅไฟฎๆน" if severity == "้ซ" else "ๅปบ่ฎฎ้ๅฝ่ฐๆด" if severity == "ไธญ" else "ๅฏไปฅๆญฃๅธธไฝฟ็จ"} |
|
|
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
|
|
|
๐จ ่ฎพ่ฎก็ๅฟต |
|
ๆฌๅฎกๆ ธ็ณป็ป่ๅไธญๅๆๅไธ็ฐไปฃ่ฎพ่ฎก๏ผไฝ็ฐๅ
ๆๅ
ธ้
ไน็พใ |
|
้็จๆฑ่ฏญๆฐ่งฃ้ฃๆ ผ๏ผๆณจ้ๆๅญ็ๅ็กฎๆงๅ่กจ่พพ็่ง่ๆงใ |
|
|
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
|
โ ๅฎกๆ ธๅฎๆ - ๆ่ฐขไฝฟ็จ ContentGuardian ๅ
ๅฎนๅฎกๆ ธๆบ่ฝไฝ โ |
|
โโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโโ |
|
""" |
|
|
|
return report |
|
|
|
|
|
demo = gr.Interface( |
|
fn=comprehensive_text_audit, |
|
inputs=[ |
|
gr.Textbox( |
|
label="Text to Audit", |
|
placeholder="Please enter the text content to be audited...", |
|
lines=8 |
|
), |
|
gr.Textbox( |
|
label="Keywords (Optional)", |
|
placeholder="Please enter keywords to mark, separated by commas", |
|
lines=2 |
|
) |
|
], |
|
outputs=gr.Textbox( |
|
label="Audit Report", |
|
lines=25, |
|
max_lines=30 |
|
), |
|
title="๐ก๏ธ ContentGuardian - Content Audit Agent", |
|
description=""" |
|
**China ยท Simplified Chinese ยท Hanyu Xinxie Style ยท Text-based Version** |
|
|
|
This system uses standardized structured text output, integrating Chinese culture with modern design concepts. |
|
Detects inappropriate advertising language, marks specified keywords, and generates detailed audit reports. |
|
|
|
**Key Features:** |
|
- ๐ซ Inappropriate Advertising Language Detection (32+ common violation words) |
|
- ๐ Precise Keyword Marking |
|
- ๐ Structured Audit Reports |
|
- ๐จ Hanyu Xinxie Aesthetic Style |
|
""", |
|
examples=[ |
|
["This product is absolutely effective, completely side-effect free, the first brand! Buy immediately, instant results!", "product,effect"], |
|
["Our product quality is very good, trustworthy, welcome to purchase.", "product,quality"], |
|
["Buy now, instant effect, 100% effective, absolutely satisfying!", "buy,effect"], |
|
["่ฟๆฌพไบงๅๆๆ็ปๅฏนๅฅฝ๏ผๅฎๅ
จๆ ๅฏไฝ็จ๏ผ็ฌฌไธๅ็๏ผ็ซๅณ่ดญไนฐ๏ผ้ฉฌไธ่งๆ๏ผ", "ไบงๅ,ๆๆ"] |
|
], |
|
theme=gr.themes.Soft(), |
|
flagging_mode="never" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|