""" ContentGuardian - Content Audit Agent China · Simplified Chinese · Hanyu Xinxie Style · Text-based Version Optimized for Hugging Face Spaces """ import gradio as gr import datetime def comprehensive_text_audit(text, keywords=""): """ Comprehensive text audit - structured text output """ if not text.strip(): return "❌ Please enter text content" # Parse keywords keyword_list = [k.strip() for k in keywords.split(",") if k.strip()] if keywords else [] # 1. Inappropriate advertising language detection inappropriate_words = [ "绝对", "完全", "100%", "永远", "从不", "必须", "一定", "第一", "最好", "最佳", "最优", "最强", "最大", "最小", "极度", "超级", "顶级", "王牌", "冠军", "霸主", "立即", "马上", "瞬间", "秒杀", "急速", "飞速", "神奇", "奇迹", "万能", "无敌", "完美", "终极", "absolutely", "completely", "perfect", "ultimate", "best", "first", "immediately", "instantly", "magical", "miracle", "supreme" ] found_inappropriate = [] for word in inappropriate_words: if word.lower() in text.lower(): positions = [] start = 0 text_lower = text.lower() word_lower = word.lower() while True: pos = text_lower.find(word_lower, start) if pos == -1: break positions.append(pos) start = pos + 1 if positions: found_inappropriate.append({ "word": word, "count": len(positions), "positions": positions }) # 2. Keyword marking found_keywords = [] for keyword in keyword_list: if keyword.lower() in text.lower(): positions = [] start = 0 text_lower = text.lower() keyword_lower = keyword.lower() while True: pos = text_lower.find(keyword_lower, start) if pos == -1: break positions.append(pos) start = pos + 1 if positions: found_keywords.append({ "keyword": keyword, "count": len(positions), "positions": positions }) # 3. Generate structured report total_issues = len(found_inappropriate) severity = "低" if total_issues < 3 else "中" if total_issues < 6 else "高" # Build standardized text report report = f""" ╔══════════════════════════════════════════════════════════════╗ ║ 📋 内容审核报告 ║ ║ 中国·简体中文·汉语新解风格 ║ ╚══════════════════════════════════════════════════════════════╝ 📊 审核概览 ├─ 总问题数:{total_issues} 处 ├─ 严重程度:{severity} ├─ 文本长度:{len(text)} 字符 └─ 审核时间:{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 🚫 不当广告用语检测 ({len(found_inappropriate)} 处) """ if found_inappropriate: for i, item in enumerate(found_inappropriate, 1): report += f""" {i}. 词汇:"{item['word']}" └─ 出现次数:{item['count']} 次 └─ 位置:{', '.join(map(str, item['positions']))} """ else: report += "\n ✅ 未发现不当广告用语\n" report += f""" ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 🔍 关键词标记 ({len(found_keywords)} 处) """ if found_keywords: for i, item in enumerate(found_keywords, 1): report += f""" {i}. 关键词:"{item['keyword']}" └─ 出现次数:{item['count']} 次 └─ 位置:{', '.join(map(str, item['positions']))} """ else: report += "\n ℹ️ 未指定关键词或关键词未出现\n" report += f""" ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 📋 详细分析 ├─ 合规建议: """ if found_inappropriate: report += f"""│ • 建议修改或删除 {len(found_inappropriate)} 个不当用词 │ • 使用相对性表述替代绝对化用词 │ • 避免夸大宣传和误导性表述 """ else: report += "│ • 内容表述规范,符合广告法要求\n" report += f"""├─ 风险等级:{severity} └─ 处理建议:{"需要重点关注和修改" if severity == "高" else "建议适当调整" if severity == "中" else "可以正常使用"} ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 🎨 设计理念 本审核系统融合中华文化与现代设计,体现内敛典雅之美。 采用汉语新解风格,注重文字的准确性和表达的规范性。 ╔══════════════════════════════════════════════════════════════╗ ║ 审核完成 - 感谢使用 ContentGuardian 内容审核智能体 ║ ╚══════════════════════════════════════════════════════════════╝ """ return report # Create Gradio interface optimized for HF Spaces demo = gr.Interface( fn=comprehensive_text_audit, inputs=[ gr.Textbox( label="Text to Audit", placeholder="Please enter the text content to be audited...", lines=8 ), gr.Textbox( label="Keywords (Optional)", placeholder="Please enter keywords to mark, separated by commas", lines=2 ) ], outputs=gr.Textbox( label="Audit Report", lines=25, max_lines=30 ), title="🛡️ ContentGuardian - Content Audit Agent", description=""" **China · Simplified Chinese · Hanyu Xinxie Style · Text-based Version** This system uses standardized structured text output, integrating Chinese culture with modern design concepts. Detects inappropriate advertising language, marks specified keywords, and generates detailed audit reports. **Key Features:** - 🚫 Inappropriate Advertising Language Detection (32+ common violation words) - 🔍 Precise Keyword Marking - 📊 Structured Audit Reports - 🎨 Hanyu Xinxie Aesthetic Style """, examples=[ ["This product is absolutely effective, completely side-effect free, the first brand! Buy immediately, instant results!", "product,effect"], ["Our product quality is very good, trustworthy, welcome to purchase.", "product,quality"], ["Buy now, instant effect, 100% effective, absolutely satisfying!", "buy,effect"], ["这款产品效果绝对好,完全无副作用,第一品牌!立即购买,马上见效!", "产品,效果"] ], theme=gr.themes.Soft(), flagging_mode="never" ) if __name__ == "__main__": demo.launch()