aeresd commited on
Commit
b64c976
·
verified ·
1 Parent(s): 691ee4d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -77
app.py CHANGED
@@ -1,10 +1,6 @@
1
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
  import torch
3
  import streamlit as st
4
- from PIL import Image
5
- import pytesseract
6
- import pandas as pd
7
- import plotly.express as px
8
 
9
  # ✅ Step 1: Emoji 翻译模型(你自己训练的模型)
10
  emoji_model_id = "jenniferhk008/roberta-hfl-emoji-aug3epoch"
@@ -26,19 +22,26 @@ model_options = {
26
  # ✅ 页面配置
27
  st.set_page_config(page_title="Emoji Offensive Text Detector", page_icon="🚨", layout="wide")
28
 
29
- # ✅ 侧边栏:模型选择
30
- with st.sidebar:
31
- st.header("🧠 Configuration")
32
- selected_model = st.selectbox("Choose classification model", list(model_options.keys()))
33
- selected_model_id = model_options[selected_model]
34
- classifier = pipeline("text-classification", model=selected_model_id, device=0 if torch.cuda.is_available() else -1)
35
 
36
- # 初始化历史记录
37
- if "history" not in st.session_state:
38
- st.session_state.history = []
 
 
39
 
40
- # 分类函数
 
 
 
41
 
 
 
 
 
 
 
42
  def classify_emoji_text(text: str):
43
  prompt = f"输入:{text}\n输出:"
44
  input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device)
@@ -50,74 +53,21 @@ def classify_emoji_text(text: str):
50
  result = classifier(translated_text)[0]
51
  label = result["label"]
52
  score = result["score"]
53
- reasoning = f"The sentence was flagged as '{label}' due to potentially offensive phrases. Consider replacing emotionally charged, ambiguous, or abusive terms."
54
 
55
- st.session_state.history.append({"text": text, "translated": translated_text, "label": label, "score": score, "reason": reasoning})
56
- return translated_text, label, score, reasoning
57
 
58
- # 主页面:输入与分析共存
59
- st.title("🚨 Emoji Offensive Text Detector & Analysis Dashboard")
60
-
61
- # 文本输入
62
- st.subheader("1. 输入与分类")
63
- default_text = "你是🐷"
64
- text = st.text_area("Enter sentence with emojis:", value=default_text, height=150)
65
-
66
- if st.button("🚦 Analyze Text"):
67
  with st.spinner("🔍 Processing..."):
68
  try:
69
- translated, label, score, reason = classify_emoji_text(text)
70
- st.markdown("**Translated sentence:**")
71
  st.code(translated, language="text")
72
- st.markdown(f"**Prediction:** {label}")
73
- st.markdown(f"**Confidence Score:** {score:.2%}")
74
- st.markdown("**Model Explanation:**")
75
- st.info(reason)
76
- except Exception as e:
77
- st.error(f"❌ An error occurred:\n{e}")
78
 
79
- # 图片上传与 OCR
80
- st.markdown("---")
81
- st.subheader("2. Image OCR")
82
- uploaded_file = st.file_uploader("Upload an image (JPG/PNG)", type=["jpg","jpeg","png"])
83
- if uploaded_file:
84
- image = Image.open(uploaded_file)
85
- st.image(image, caption="Uploaded Screenshot", use_column_width=True)
86
- with st.spinner("🧠 Extracting text via OCR..."):
87
- ocr_text = pytesseract.image_to_string(image, lang="chi_sim+eng").strip()
88
- if ocr_text:
89
- st.markdown("**Extracted Text:**")
90
- st.code(ocr_text)
91
- translated, label, score, reason = classify_emoji_text(ocr_text)
92
- st.markdown("**Translated sentence:**")
93
- st.code(translated, language="text")
94
- st.markdown(f"**Prediction:** {label}")
95
- st.markdown(f"**Confidence Score:** {score:.2%}")
96
- st.markdown("**Model Explanation:**")
97
- st.info(reason)
98
- else:
99
- st.info("⚠️ No text detected in the image.")
100
 
101
- # 分析仪表盘
102
- st.markdown("---")
103
- st.subheader("3. Violation Analysis Dashboard")
104
- if st.session_state.history:
105
- # 展示历史记录
106
- df = pd.DataFrame(st.session_state.history)
107
- st.markdown("### 🧾 Offensive Terms & Suggestions")
108
- for item in st.session_state.history:
109
- st.markdown(f"- 🔹 **Input:** {item['text']}")
110
- st.markdown(f" - ✨ **Translated:** {item['translated']}")
111
- st.markdown(f" - ❗ **Label:** {item['label']} with **{item['score']:.2%}** confidence")
112
- st.markdown(f" - 🔧 **Suggestion:** {item['reason']}")
113
-
114
- # 雷达图
115
- radar_df = pd.DataFrame({
116
- "Category": ["Insult","Abuse","Discrimination","Hate Speech","Vulgarity"],
117
- "Score": [0.7,0.4,0.3,0.5,0.6]
118
- })
119
- radar_fig = px.line_polar(radar_df, r='Score', theta='Category', line_close=True, title="⚠️ Risk Radar by Category")
120
- radar_fig.update_traces(line_color='black')
121
- st.plotly_chart(radar_fig)
122
  else:
123
- st.info("⚠️ No classification data available yet.")
 
1
  from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
2
  import torch
3
  import streamlit as st
 
 
 
 
4
 
5
  # ✅ Step 1: Emoji 翻译模型(你自己训练的模型)
6
  emoji_model_id = "jenniferhk008/roberta-hfl-emoji-aug3epoch"
 
22
  # ✅ 页面配置
23
  st.set_page_config(page_title="Emoji Offensive Text Detector", page_icon="🚨", layout="wide")
24
 
25
+ # ✅ 页面标题
26
+ st.title("🧠 Emoji-based Offensive Language Classifier")
 
 
 
 
27
 
28
+ st.markdown("""
29
+ This application translates emojis in a sentence and classifies whether the final sentence is offensive or not using two AI models.
30
+ - The **first model** translates emoji or symbolic phrases into standard Chinese text.
31
+ - The **second model** performs offensive language detection.
32
+ """)
33
 
34
+ # Streamlit 侧边栏模型选择
35
+ selected_model = st.sidebar.selectbox("Choose classification model", list(model_options.keys()))
36
+ selected_model_id = model_options[selected_model]
37
+ classifier = pipeline("text-classification", model=selected_model_id, device=0 if torch.cuda.is_available() else -1)
38
 
39
+ # ✅ 输入区域
40
+ st.markdown("### ✍️ Input your sentence:")
41
+ default_text = "你是🐷"
42
+ text = st.text_area("Enter sentence with emojis:", value=default_text, height=150)
43
+
44
+ # ✅ 主逻辑封装函数
45
  def classify_emoji_text(text: str):
46
  prompt = f"输入:{text}\n输出:"
47
  input_ids = emoji_tokenizer(prompt, return_tensors="pt").to(emoji_model.device)
 
53
  result = classifier(translated_text)[0]
54
  label = result["label"]
55
  score = result["score"]
 
56
 
57
+ return translated_text, label, score
 
58
 
59
+ # ✅ 触发按钮
60
+ if st.button("🚦 Analyze"):
 
 
 
 
 
 
 
61
  with st.spinner("🔍 Processing..."):
62
  try:
63
+ translated, label, score = classify_emoji_text(text)
64
+ st.markdown("### 🔄 Translated sentence:")
65
  st.code(translated, language="text")
 
 
 
 
 
 
66
 
67
+ st.markdown(f"### 🎯 Prediction: `{label}`")
68
+ st.markdown(f"### 📊 Confidence Score: `{score:.2%}`")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ except Exception as e:
71
+ st.error(f"❌ An error occurred during processing:\n\n{e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  else:
73
+ st.info("👈 Please input text and click the button to classify.")