ad-guardian / app.py
Nanny7's picture
Add ContentGuardian - Content Audit Agent with Hanyu Xinxie Style
dac7f95
raw
history blame
8.05 kB
"""
ContentGuardian - Content Audit Agent
China ยท Simplified Chinese ยท Hanyu Xinxie Style ยท Text-based Version
Optimized for Hugging Face Spaces
"""
import gradio as gr
import datetime
def comprehensive_text_audit(text, keywords=""):
"""
Comprehensive text audit - structured text output
"""
if not text.strip():
return "โŒ Please enter text content"
# Parse keywords
keyword_list = [k.strip() for k in keywords.split(",") if k.strip()] if keywords else []
# 1. Inappropriate advertising language detection
inappropriate_words = [
"็ปๅฏน", "ๅฎŒๅ…จ", "100%", "ๆฐธ่ฟœ", "ไปŽไธ", "ๅฟ…้กป", "ไธ€ๅฎš",
"็ฌฌไธ€", "ๆœ€ๅฅฝ", "ๆœ€ไฝณ", "ๆœ€ไผ˜", "ๆœ€ๅผบ", "ๆœ€ๅคง", "ๆœ€ๅฐ",
"ๆžๅบฆ", "่ถ…็บง", "้กถ็บง", "็Ž‹็‰Œ", "ๅ† ๅ†›", "้œธไธป",
"็ซ‹ๅณ", "้ฉฌไธŠ", "็žฌ้—ด", "็ง’ๆ€", "ๆ€ฅ้€Ÿ", "้ฃž้€Ÿ",
"็ฅžๅฅ‡", "ๅฅ‡่ฟน", "ไธ‡่ƒฝ", "ๆ— ๆ•Œ", "ๅฎŒ็พŽ", "็ปˆๆž",
"absolutely", "completely", "perfect", "ultimate", "best", "first",
"immediately", "instantly", "magical", "miracle", "supreme"
]
found_inappropriate = []
for word in inappropriate_words:
if word.lower() in text.lower():
positions = []
start = 0
text_lower = text.lower()
word_lower = word.lower()
while True:
pos = text_lower.find(word_lower, start)
if pos == -1:
break
positions.append(pos)
start = pos + 1
if positions:
found_inappropriate.append({
"word": word,
"count": len(positions),
"positions": positions
})
# 2. Keyword marking
found_keywords = []
for keyword in keyword_list:
if keyword.lower() in text.lower():
positions = []
start = 0
text_lower = text.lower()
keyword_lower = keyword.lower()
while True:
pos = text_lower.find(keyword_lower, start)
if pos == -1:
break
positions.append(pos)
start = pos + 1
if positions:
found_keywords.append({
"keyword": keyword,
"count": len(positions),
"positions": positions
})
# 3. Generate structured report
total_issues = len(found_inappropriate)
severity = "ไฝŽ" if total_issues < 3 else "ไธญ" if total_issues < 6 else "้ซ˜"
# Build standardized text report
report = f"""
โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—
โ•‘ ๐Ÿ“‹ ๅ†…ๅฎนๅฎกๆ ธๆŠฅๅ‘Š โ•‘
โ•‘ ไธญๅ›ฝยท็ฎ€ไฝ“ไธญๆ–‡ยทๆฑ‰่ฏญๆ–ฐ่งฃ้ฃŽๆ ผ โ•‘
โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
๐Ÿ“Š ๅฎกๆ ธๆฆ‚่งˆ
โ”œโ”€ ๆ€ป้—ฎ้ข˜ๆ•ฐ๏ผš{total_issues} ๅค„
โ”œโ”€ ไธฅ้‡็จ‹ๅบฆ๏ผš{severity}
โ”œโ”€ ๆ–‡ๆœฌ้•ฟๅบฆ๏ผš{len(text)} ๅญ—็ฌฆ
โ””โ”€ ๅฎกๆ ธๆ—ถ้—ด๏ผš{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
๐Ÿšซ ไธๅฝ“ๅนฟๅ‘Š็”จ่ฏญๆฃ€ๆต‹ ({len(found_inappropriate)} ๅค„)
"""
if found_inappropriate:
for i, item in enumerate(found_inappropriate, 1):
report += f"""
{i}. ่ฏๆฑ‡๏ผš"{item['word']}"
โ””โ”€ ๅ‡บ็Žฐๆฌกๆ•ฐ๏ผš{item['count']} ๆฌก
โ””โ”€ ไฝ็ฝฎ๏ผš{', '.join(map(str, item['positions']))}
"""
else:
report += "\n โœ… ๆœชๅ‘็Žฐไธๅฝ“ๅนฟๅ‘Š็”จ่ฏญ\n"
report += f"""
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
๐Ÿ” ๅ…ณ้”ฎ่ฏๆ ‡่ฎฐ ({len(found_keywords)} ๅค„)
"""
if found_keywords:
for i, item in enumerate(found_keywords, 1):
report += f"""
{i}. ๅ…ณ้”ฎ่ฏ๏ผš"{item['keyword']}"
โ””โ”€ ๅ‡บ็Žฐๆฌกๆ•ฐ๏ผš{item['count']} ๆฌก
โ””โ”€ ไฝ็ฝฎ๏ผš{', '.join(map(str, item['positions']))}
"""
else:
report += "\n โ„น๏ธ ๆœชๆŒ‡ๅฎšๅ…ณ้”ฎ่ฏๆˆ–ๅ…ณ้”ฎ่ฏๆœชๅ‡บ็Žฐ\n"
report += f"""
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
๐Ÿ“‹ ่ฏฆ็ป†ๅˆ†ๆž
โ”œโ”€ ๅˆ่ง„ๅปบ่ฎฎ๏ผš
"""
if found_inappropriate:
report += f"""โ”‚ โ€ข ๅปบ่ฎฎไฟฎๆ”นๆˆ–ๅˆ ้™ค {len(found_inappropriate)} ไธชไธๅฝ“็”จ่ฏ
โ”‚ โ€ข ไฝฟ็”จ็›ธๅฏนๆ€ง่กจ่ฟฐๆ›ฟไปฃ็ปๅฏนๅŒ–็”จ่ฏ
โ”‚ โ€ข ้ฟๅ…ๅคธๅคงๅฎฃไผ ๅ’Œ่ฏฏๅฏผๆ€ง่กจ่ฟฐ
"""
else:
report += "โ”‚ โ€ข ๅ†…ๅฎน่กจ่ฟฐ่ง„่Œƒ๏ผŒ็ฌฆๅˆๅนฟๅ‘Šๆณ•่ฆๆฑ‚\n"
report += f"""โ”œโ”€ ้ฃŽ้™ฉ็ญ‰็บง๏ผš{severity}
โ””โ”€ ๅค„็†ๅปบ่ฎฎ๏ผš{"้œ€่ฆ้‡็‚นๅ…ณๆณจๅ’Œไฟฎๆ”น" if severity == "้ซ˜" else "ๅปบ่ฎฎ้€‚ๅฝ“่ฐƒๆ•ด" if severity == "ไธญ" else "ๅฏไปฅๆญฃๅธธไฝฟ็”จ"}
โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”โ”
๐ŸŽจ ่ฎพ่ฎก็†ๅฟต
ๆœฌๅฎกๆ ธ็ณป็ปŸ่žๅˆไธญๅŽๆ–‡ๅŒ–ไธŽ็Žฐไปฃ่ฎพ่ฎก๏ผŒไฝ“็Žฐๅ†…ๆ•›ๅ…ธ้›…ไน‹็พŽใ€‚
้‡‡็”จๆฑ‰่ฏญๆ–ฐ่งฃ้ฃŽๆ ผ๏ผŒๆณจ้‡ๆ–‡ๅญ—็š„ๅ‡†็กฎๆ€งๅ’Œ่กจ่พพ็š„่ง„่Œƒๆ€งใ€‚
โ•”โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•—
โ•‘ ๅฎกๆ ธๅฎŒๆˆ - ๆ„Ÿ่ฐขไฝฟ็”จ ContentGuardian ๅ†…ๅฎนๅฎกๆ ธๆ™บ่ƒฝไฝ“ โ•‘
โ•šโ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•โ•
"""
return report
# Create Gradio interface optimized for HF Spaces
demo = gr.Interface(
fn=comprehensive_text_audit,
inputs=[
gr.Textbox(
label="Text to Audit",
placeholder="Please enter the text content to be audited...",
lines=8
),
gr.Textbox(
label="Keywords (Optional)",
placeholder="Please enter keywords to mark, separated by commas",
lines=2
)
],
outputs=gr.Textbox(
label="Audit Report",
lines=25,
max_lines=30
),
title="๐Ÿ›ก๏ธ ContentGuardian - Content Audit Agent",
description="""
**China ยท Simplified Chinese ยท Hanyu Xinxie Style ยท Text-based Version**
This system uses standardized structured text output, integrating Chinese culture with modern design concepts.
Detects inappropriate advertising language, marks specified keywords, and generates detailed audit reports.
**Key Features:**
- ๐Ÿšซ Inappropriate Advertising Language Detection (32+ common violation words)
- ๐Ÿ” Precise Keyword Marking
- ๐Ÿ“Š Structured Audit Reports
- ๐ŸŽจ Hanyu Xinxie Aesthetic Style
""",
examples=[
["This product is absolutely effective, completely side-effect free, the first brand! Buy immediately, instant results!", "product,effect"],
["Our product quality is very good, trustworthy, welcome to purchase.", "product,quality"],
["Buy now, instant effect, 100% effective, absolutely satisfying!", "buy,effect"],
["่ฟ™ๆฌพไบงๅ“ๆ•ˆๆžœ็ปๅฏนๅฅฝ๏ผŒๅฎŒๅ…จๆ— ๅ‰ฏไฝœ็”จ๏ผŒ็ฌฌไธ€ๅ“็‰Œ๏ผ็ซ‹ๅณ่ดญไนฐ๏ผŒ้ฉฌไธŠ่งๆ•ˆ๏ผ", "ไบงๅ“,ๆ•ˆๆžœ"]
],
theme=gr.themes.Soft(),
flagging_mode="never"
)
if __name__ == "__main__":
demo.launch()