Spaces:

Agents-MCP-Hackathon
/

ad-guardian

Running

App Files Files Community

ad-guardian / app.py

Nanny7

Add ContentGuardian - Content Audit Agent with Hanyu Xinxie Style

dac7f95 25 days ago

raw

history blame

8.05 kB

	"""
	ContentGuardian - Content Audit Agent
	China · Simplified Chinese · Hanyu Xinxie Style · Text-based Version
	Optimized for Hugging Face Spaces
	"""
	import gradio as gr
	import datetime

	def comprehensive_text_audit(text, keywords=""):
	"""
	Comprehensive text audit - structured text output
	"""
	if not text.strip():
	return "❌ Please enter text content"

	# Parse keywords
	keyword_list = [k.strip() for k in keywords.split(",") if k.strip()] if keywords else []

	# 1. Inappropriate advertising language detection
	inappropriate_words = [
	"绝对", "完全", "100%", "永远", "从不", "必须", "一定",
	"第一", "最好", "最佳", "最优", "最强", "最大", "最小",
	"极度", "超级", "顶级", "王牌", "冠军", "霸主",
	"立即", "马上", "瞬间", "秒杀", "急速", "飞速",
	"神奇", "奇迹", "万能", "无敌", "完美", "终极",
	"absolutely", "completely", "perfect", "ultimate", "best", "first",
	"immediately", "instantly", "magical", "miracle", "supreme"
	]

	found_inappropriate = []
	for word in inappropriate_words:
	if word.lower() in text.lower():
	positions = []
	start = 0
	text_lower = text.lower()
	word_lower = word.lower()
	while True:
	pos = text_lower.find(word_lower, start)
	if pos == -1:
	break
	positions.append(pos)
	start = pos + 1
	if positions:
	found_inappropriate.append({
	"word": word,
	"count": len(positions),
	"positions": positions
	})

	# 2. Keyword marking
	found_keywords = []
	for keyword in keyword_list:
	if keyword.lower() in text.lower():
	positions = []
	start = 0
	text_lower = text.lower()
	keyword_lower = keyword.lower()
	while True:
	pos = text_lower.find(keyword_lower, start)
	if pos == -1:
	break
	positions.append(pos)
	start = pos + 1
	if positions:
	found_keywords.append({
	"keyword": keyword,
	"count": len(positions),
	"positions": positions
	})

	# 3. Generate structured report
	total_issues = len(found_inappropriate)
	severity = "低" if total_issues < 3 else "中" if total_issues < 6 else "高"

	# Build standardized text report
	report = f"""
	╔══════════════════════════════════════════════════════════════╗
	║ 📋 内容审核报告 ║
	║ 中国·简体中文·汉语新解风格 ║
	╚══════════════════════════════════════════════════════════════╝

	📊 审核概览
	├─ 总问题数：{total_issues} 处
	├─ 严重程度：{severity}
	├─ 文本长度：{len(text)} 字符
	└─ 审核时间：{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

	━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

	🚫 不当广告用语检测 ({len(found_inappropriate)} 处)
	"""

	if found_inappropriate:
	for i, item in enumerate(found_inappropriate, 1):
	report += f"""
	{i}. 词汇："{item['word']}"
	└─ 出现次数：{item['count']} 次
	└─ 位置：{', '.join(map(str, item['positions']))}
	"""
	else:
	report += "\n ✅ 未发现不当广告用语\n"

	report += f"""
	━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

	🔍 关键词标记 ({len(found_keywords)} 处)
	"""

	if found_keywords:
	for i, item in enumerate(found_keywords, 1):
	report += f"""
	{i}. 关键词："{item['keyword']}"
	└─ 出现次数：{item['count']} 次
	└─ 位置：{', '.join(map(str, item['positions']))}
	"""
	else:
	report += "\n ℹ️ 未指定关键词或关键词未出现\n"

	report += f"""
	━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

	📋 详细分析
	├─ 合规建议：
	"""

	if found_inappropriate:
	report += f"""│ • 建议修改或删除 {len(found_inappropriate)} 个不当用词
	│ • 使用相对性表述替代绝对化用词
	│ • 避免夸大宣传和误导性表述
	"""
	else:
	report += "│ • 内容表述规范，符合广告法要求\n"

	report += f"""├─ 风险等级：{severity}
	└─ 处理建议：{"需要重点关注和修改" if severity == "高" else "建议适当调整" if severity == "中" else "可以正常使用"}

	━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

	🎨 设计理念
	本审核系统融合中华文化与现代设计，体现内敛典雅之美。
	采用汉语新解风格，注重文字的准确性和表达的规范性。

	╔══════════════════════════════════════════════════════════════╗
	║ 审核完成 - 感谢使用 ContentGuardian 内容审核智能体 ║
	╚══════════════════════════════════════════════════════════════╝
	"""

	return report

	# Create Gradio interface optimized for HF Spaces
	demo = gr.Interface(
	fn=comprehensive_text_audit,
	inputs=[
	gr.Textbox(
	label="Text to Audit",
	placeholder="Please enter the text content to be audited...",
	lines=8
	),
	gr.Textbox(
	label="Keywords (Optional)",
	placeholder="Please enter keywords to mark, separated by commas",
	lines=2
	)
	],
	outputs=gr.Textbox(
	label="Audit Report",
	lines=25,
	max_lines=30
	),
	title="🛡️ ContentGuardian - Content Audit Agent",
	description="""
	China · Simplified Chinese · Hanyu Xinxie Style · Text-based Version

	This system uses standardized structured text output, integrating Chinese culture with modern design concepts.
	Detects inappropriate advertising language, marks specified keywords, and generates detailed audit reports.

	Key Features:
	- 🚫 Inappropriate Advertising Language Detection (32+ common violation words)
	- 🔍 Precise Keyword Marking
	- 📊 Structured Audit Reports
	- 🎨 Hanyu Xinxie Aesthetic Style
	""",
	examples=[
	["This product is absolutely effective, completely side-effect free, the first brand! Buy immediately, instant results!", "product,effect"],
	["Our product quality is very good, trustworthy, welcome to purchase.", "product,quality"],
	["Buy now, instant effect, 100% effective, absolutely satisfying!", "buy,effect"],
	["这款产品效果绝对好，完全无副作用，第一品牌！立即购买，马上见效！", "产品,效果"]
	],
	theme=gr.themes.Soft(),
	flagging_mode="never"
	)

	if __name__ == "__main__":
	demo.launch()