Update app.py
Browse files
app.py
CHANGED
@@ -2,10 +2,10 @@ from datasets import load_dataset
|
|
2 |
from transformers import pipeline
|
3 |
import gradio as gr
|
4 |
|
5 |
-
# Load
|
6 |
dataset = load_dataset("Koushim/processed-jigsaw-toxic-comments", split="train", streaming=True)
|
7 |
|
8 |
-
#
|
9 |
low, medium, high = [], [], []
|
10 |
for example in dataset:
|
11 |
score = example['toxicity']
|
@@ -20,46 +20,43 @@ for example in dataset:
|
|
20 |
break
|
21 |
|
22 |
examples_html = f"""
|
23 |
-
###
|
24 |
|
25 |
-
|
26 |
- {low[0][0]} (score: {low[0][1]:.2f})
|
27 |
- {low[1][0]} (score: {low[1][1]:.2f})
|
28 |
- {low[2][0]} (score: {low[2][1]:.2f})
|
29 |
|
30 |
-
|
31 |
- {medium[0][0]} (score: {medium[0][1]:.2f})
|
32 |
- {medium[1][0]} (score: {medium[1][1]:.2f})
|
33 |
- {medium[2][0]} (score: {medium[2][1]:.2f})
|
34 |
|
35 |
-
|
36 |
- {high[0][0]} (score: {high[0][1]:.2f})
|
37 |
- {high[1][0]} (score: {high[1][1]:.2f})
|
38 |
- {high[2][0]} (score: {high[2][1]:.2f})
|
39 |
"""
|
40 |
|
41 |
-
# Load
|
42 |
-
classifier = pipeline(
|
43 |
-
"text-classification",
|
44 |
-
model="cardiffnlp/twitter-roberta-base-offensive",
|
45 |
-
top_k=None
|
46 |
-
)
|
47 |
|
48 |
def predict_toxicity(text):
|
49 |
-
preds = classifier(text)
|
50 |
-
|
51 |
for pred in preds:
|
52 |
-
|
53 |
-
|
|
|
|
|
54 |
|
55 |
-
# Gradio UI
|
56 |
with gr.Blocks() as demo:
|
57 |
gr.Markdown("# 🧹 Hate Speech & Toxicity Monitor")
|
58 |
gr.Markdown("This tool shows examples of toxic comments and lets you check your own text for toxicity using a Hugging Face model.")
|
59 |
gr.Markdown(examples_html)
|
60 |
|
61 |
inp = gr.Textbox(label="🔷 Enter your comment")
|
62 |
-
out = gr.Markdown(label="Toxicity Scores")
|
63 |
btn = gr.Button("Check Toxicity")
|
64 |
btn.click(fn=predict_toxicity, inputs=inp, outputs=out)
|
65 |
|
|
|
2 |
from transformers import pipeline
|
3 |
import gradio as gr
|
4 |
|
5 |
+
# Load dataset
|
6 |
dataset = load_dataset("Koushim/processed-jigsaw-toxic-comments", split="train", streaming=True)
|
7 |
|
8 |
+
# Sample examples
|
9 |
low, medium, high = [], [], []
|
10 |
for example in dataset:
|
11 |
score = example['toxicity']
|
|
|
20 |
break
|
21 |
|
22 |
examples_html = f"""
|
23 |
+
### 🧪 Examples of Toxicity Levels
|
24 |
|
25 |
+
#### 🔷 Low Toxicity
|
26 |
- {low[0][0]} (score: {low[0][1]:.2f})
|
27 |
- {low[1][0]} (score: {low[1][1]:.2f})
|
28 |
- {low[2][0]} (score: {low[2][1]:.2f})
|
29 |
|
30 |
+
#### 🟠 Medium Toxicity
|
31 |
- {medium[0][0]} (score: {medium[0][1]:.2f})
|
32 |
- {medium[1][0]} (score: {medium[1][1]:.2f})
|
33 |
- {medium[2][0]} (score: {medium[2][1]:.2f})
|
34 |
|
35 |
+
#### 🔴 High Toxicity
|
36 |
- {high[0][0]} (score: {high[0][1]:.2f})
|
37 |
- {high[1][0]} (score: {high[1][1]:.2f})
|
38 |
- {high[2][0]} (score: {high[2][1]:.2f})
|
39 |
"""
|
40 |
|
41 |
+
# Load toxicity detection pipeline
|
42 |
+
classifier = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-offensive", top_k=None)
|
|
|
|
|
|
|
|
|
43 |
|
44 |
def predict_toxicity(text):
|
45 |
+
preds = classifier(text)[0]
|
46 |
+
results = []
|
47 |
for pred in preds:
|
48 |
+
label = pred['label']
|
49 |
+
score = pred['score']
|
50 |
+
results.append(f"**{label}**: {score:.2f}")
|
51 |
+
return "\n".join(results)
|
52 |
|
|
|
53 |
with gr.Blocks() as demo:
|
54 |
gr.Markdown("# 🧹 Hate Speech & Toxicity Monitor")
|
55 |
gr.Markdown("This tool shows examples of toxic comments and lets you check your own text for toxicity using a Hugging Face model.")
|
56 |
gr.Markdown(examples_html)
|
57 |
|
58 |
inp = gr.Textbox(label="🔷 Enter your comment")
|
59 |
+
out = gr.Markdown(label="🔷 Toxicity Scores")
|
60 |
btn = gr.Button("Check Toxicity")
|
61 |
btn.click(fn=predict_toxicity, inputs=inp, outputs=out)
|
62 |
|