Go-Raw commited on
Commit
ad87d66
·
verified ·
1 Parent(s): abde2fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -18
app.py CHANGED
@@ -2,10 +2,10 @@ from datasets import load_dataset
2
  from transformers import pipeline
3
  import gradio as gr
4
 
5
- # Load Jigsaw dataset (streaming for large size)
6
  dataset = load_dataset("Koushim/processed-jigsaw-toxic-comments", split="train", streaming=True)
7
 
8
- # Fetch sample comments with low, medium, high toxicity
9
  low, medium, high = [], [], []
10
  for example in dataset:
11
  score = example['toxicity']
@@ -20,46 +20,43 @@ for example in dataset:
20
  break
21
 
22
  examples_html = f"""
23
- ### 🔷 Examples of Toxicity Levels
24
 
25
- **🟢 Low Toxicity**
26
  - {low[0][0]} (score: {low[0][1]:.2f})
27
  - {low[1][0]} (score: {low[1][1]:.2f})
28
  - {low[2][0]} (score: {low[2][1]:.2f})
29
 
30
- **🟡 Medium Toxicity**
31
  - {medium[0][0]} (score: {medium[0][1]:.2f})
32
  - {medium[1][0]} (score: {medium[1][1]:.2f})
33
  - {medium[2][0]} (score: {medium[2][1]:.2f})
34
 
35
- **🔴 High Toxicity**
36
  - {high[0][0]} (score: {high[0][1]:.2f})
37
  - {high[1][0]} (score: {high[1][1]:.2f})
38
  - {high[2][0]} (score: {high[2][1]:.2f})
39
  """
40
 
41
- # Load a toxicity/offensive detection pipeline
42
- classifier = pipeline(
43
- "text-classification",
44
- model="cardiffnlp/twitter-roberta-base-offensive",
45
- top_k=None
46
- )
47
 
48
  def predict_toxicity(text):
49
- preds = classifier(text)
50
- result_str = ""
51
  for pred in preds:
52
- result_str += f"**{pred['label']}**: {pred['score']:.2f}\n"
53
- return result_str.strip()
 
 
54
 
55
- # Gradio UI
56
  with gr.Blocks() as demo:
57
  gr.Markdown("# 🧹 Hate Speech & Toxicity Monitor")
58
  gr.Markdown("This tool shows examples of toxic comments and lets you check your own text for toxicity using a Hugging Face model.")
59
  gr.Markdown(examples_html)
60
 
61
  inp = gr.Textbox(label="🔷 Enter your comment")
62
- out = gr.Markdown(label="Toxicity Scores")
63
  btn = gr.Button("Check Toxicity")
64
  btn.click(fn=predict_toxicity, inputs=inp, outputs=out)
65
 
 
2
  from transformers import pipeline
3
  import gradio as gr
4
 
5
+ # Load dataset
6
  dataset = load_dataset("Koushim/processed-jigsaw-toxic-comments", split="train", streaming=True)
7
 
8
+ # Sample examples
9
  low, medium, high = [], [], []
10
  for example in dataset:
11
  score = example['toxicity']
 
20
  break
21
 
22
  examples_html = f"""
23
+ ### 🧪 Examples of Toxicity Levels
24
 
25
+ #### 🔷 Low Toxicity
26
  - {low[0][0]} (score: {low[0][1]:.2f})
27
  - {low[1][0]} (score: {low[1][1]:.2f})
28
  - {low[2][0]} (score: {low[2][1]:.2f})
29
 
30
+ #### 🟠 Medium Toxicity
31
  - {medium[0][0]} (score: {medium[0][1]:.2f})
32
  - {medium[1][0]} (score: {medium[1][1]:.2f})
33
  - {medium[2][0]} (score: {medium[2][1]:.2f})
34
 
35
+ #### 🔴 High Toxicity
36
  - {high[0][0]} (score: {high[0][1]:.2f})
37
  - {high[1][0]} (score: {high[1][1]:.2f})
38
  - {high[2][0]} (score: {high[2][1]:.2f})
39
  """
40
 
41
+ # Load toxicity detection pipeline
42
+ classifier = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-offensive", top_k=None)
 
 
 
 
43
 
44
  def predict_toxicity(text):
45
+ preds = classifier(text)[0]
46
+ results = []
47
  for pred in preds:
48
+ label = pred['label']
49
+ score = pred['score']
50
+ results.append(f"**{label}**: {score:.2f}")
51
+ return "\n".join(results)
52
 
 
53
  with gr.Blocks() as demo:
54
  gr.Markdown("# 🧹 Hate Speech & Toxicity Monitor")
55
  gr.Markdown("This tool shows examples of toxic comments and lets you check your own text for toxicity using a Hugging Face model.")
56
  gr.Markdown(examples_html)
57
 
58
  inp = gr.Textbox(label="🔷 Enter your comment")
59
+ out = gr.Markdown(label="🔷 Toxicity Scores")
60
  btn = gr.Button("Check Toxicity")
61
  btn.click(fn=predict_toxicity, inputs=inp, outputs=out)
62