aaryan24 commited on
Commit
0cc2ad1
·
verified ·
1 Parent(s): bf1acc0

Upload 7 files

Browse files
Files changed (8) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +34 -0
  3. app.py +103 -0
  4. hs_gru.h5 +3 -0
  5. hs_gru.keras +3 -0
  6. index.html +194 -0
  7. requirements.txt +7 -0
  8. tokenizerpkl_gru.pkl +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ hs_gru.keras filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Set environment variables for Hugging Face cache
4
+ ENV HF_HOME=/code/hf_cache
5
+ ENV TRANSFORMERS_CACHE=/code/hf_cache
6
+ ENV PYTHONDONTWRITEBYTECODE=1
7
+ ENV PYTHONUNBUFFERED=1
8
+
9
+ # Create working directory
10
+ WORKDIR /code
11
+
12
+ # System dependencies
13
+ RUN apt-get update && apt-get install -y \
14
+ git \
15
+ wget \
16
+ build-essential \
17
+ && rm -rf /var/lib/apt/lists/*
18
+
19
+ # Install Python packages
20
+ COPY requirements.txt .
21
+ RUN pip install --no-cache-dir --upgrade pip \
22
+ && pip install --no-cache-dir -r requirements.txt
23
+
24
+ # Copy app code
25
+ COPY . .
26
+
27
+ # Create model cache directory
28
+ RUN mkdir -p /code/hf_cache
29
+
30
+ # Expose FastAPI port
31
+ EXPOSE 7860
32
+
33
+ # Start the app
34
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request, HTTPException
2
+ from fastapi.responses import HTMLResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+ from pydantic import BaseModel
6
+ import tensorflow as tf
7
+ import pickle
8
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
9
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
10
+ import torch
11
+ from fastapi.responses import JSONResponse
12
+ # Initialize FastAPI
13
+ app = FastAPI()
14
+
15
+ # Load GRU model and tokenizer
16
+ gru_model = tf.keras.models.load_model('hs_gru.h5')
17
+ with open('tokenizerpkl_gru.pkl', 'rb') as f:
18
+ gru_tokenizer = pickle.load(f)
19
+ gru_maxlen = 100
20
+
21
+ # Load RoBERTa model
22
+ # Load RoBERTa model
23
+ roberta_model_name = "facebook/roberta-hate-speech-dynabench-r4-target"
24
+ roberta_tokenizer = AutoTokenizer.from_pretrained(roberta_model_name)
25
+ if roberta_tokenizer.pad_token is None:
26
+ roberta_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
27
+ roberta_model = AutoModelForSequenceClassification.from_pretrained(roberta_model_name)
28
+ roberta_model.resize_token_embeddings(len(roberta_tokenizer))
29
+
30
+ #load toxigen-hatebert model
31
+ toxigen_model_name = "tomh/toxigen_roberta"
32
+ toxigen_tokenizer = AutoTokenizer.from_pretrained(toxigen_model_name)
33
+ if toxigen_tokenizer.pad_token is None:
34
+ toxigen_tokenizer.add_special_tokens({'pad_token': '[PAD]'})
35
+ toxigen_model = AutoModelForSequenceClassification.from_pretrained(toxigen_model_name)
36
+ toxigen_model.resize_token_embeddings(len(toxigen_tokenizer))
37
+
38
+ # Enable CORS
39
+ app.add_middleware(
40
+ CORSMiddleware,
41
+ allow_origins=["*"],
42
+ allow_credentials=True,
43
+ allow_methods=["*"],
44
+ allow_headers=["*"],
45
+ )
46
+
47
+ # Mount static directory
48
+ # app.mount("/static", StaticFiles(directory="static"), name="static")
49
+
50
+ # Pydantic input model
51
+ class TextInput(BaseModel):
52
+ text: str
53
+
54
+ @app.get("/", response_class=HTMLResponse)
55
+ def read_root():
56
+ with open("index.html", "r") as f:
57
+ return f.read()
58
+
59
+ @app.get("/health")
60
+ def health_check():
61
+ return {"message": "Hate Speech Detection API is running!"}
62
+
63
+ @app.post("/predict")
64
+ def predict_ensemble(input: TextInput):
65
+ try:
66
+ text = input.text
67
+ # print(f"Received input: {input.text}")
68
+
69
+ # ----- GRU Prediction -----
70
+ seq = gru_tokenizer.texts_to_sequences([text])
71
+ padded = pad_sequences(seq, maxlen=gru_maxlen, padding='post')
72
+ gru_prob = float(gru_model.predict(padded)[0][0])
73
+
74
+ # ----- RoBERTa Prediction -----
75
+ inputs_roberta = roberta_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
76
+ with torch.no_grad():
77
+ logits_roberta = roberta_model(**inputs_roberta).logits
78
+ probs_roberta = torch.nn.functional.softmax(logits_roberta, dim=1)
79
+ roberta_prob = float(probs_roberta[0][1].item())
80
+
81
+ # -----toxigen -hatebert Prediction -----
82
+ inputs_toxigen = toxigen_tokenizer(text, return_tensors="pt", truncation=True, padding=True)
83
+ with torch.no_grad():
84
+ logits_toxigen = toxigen_model(**inputs_toxigen).logits
85
+ probs_toxigen = torch.nn.functional.softmax(logits_toxigen, dim=1)
86
+ toxigen_prob = float(probs_toxigen[0][1].item())
87
+
88
+ # ----- Weighted Ensemble -----
89
+ final_score = (0.3 * gru_prob) + (0.4 * roberta_prob) + (0.3 * toxigen_prob)
90
+ label = "Hate Speech" if final_score > 0.5 else "Not Hate Speech"
91
+
92
+ return {
93
+ # "text": text,
94
+ "gru_prob": round(gru_prob, 4),
95
+ "roberta_prob": round(roberta_prob, 4),
96
+ "toxigen_prob": round(toxigen_prob, 4),
97
+ "final_score": round(final_score, 4),
98
+ "prediction": label
99
+ }
100
+
101
+ except Exception as e:
102
+ print(f"Error during prediction: {str(e)}")
103
+ return JSONResponse(status_code=500, content={"detail": str(e)})
hs_gru.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d64d7840832a93d75389d7e15450bcf1914b6b5be1b23dd55543deb131c11528
3
+ size 26368672
hs_gru.keras ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7699a8f4fbe4194d72813727483e91cccb8b3ccdcf7d234a7811bff043c0d4e4
3
+ size 26367562
index.html ADDED
@@ -0,0 +1,194 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <title>Hate Speech Detection</title>
6
+ <style>
7
+ body {
8
+ font-family: Arial, sans-serif;
9
+ background: #f9f9f9;
10
+ color: #000;
11
+ margin: 0;
12
+ padding: 20px;
13
+ display: flex;
14
+ flex-direction: column;
15
+ align-items: center;
16
+ }
17
+
18
+ body.dark-mode {
19
+ background: #272626;
20
+ color: #fff;
21
+ }
22
+
23
+ h1 {
24
+ font-size: 40px;
25
+ margin-bottom: 10px;
26
+ }
27
+
28
+ .subtext {
29
+ font-size: 16px;
30
+ margin-bottom: 40px;
31
+ text-align: center;
32
+ max-width: 800px;
33
+ }
34
+
35
+ .container {
36
+ display: flex;
37
+ justify-content: center;
38
+ gap: 40px;
39
+ width: 100%;
40
+ max-width: 1000px;
41
+ }
42
+
43
+ .left-panel, .right-panel {
44
+ flex: 1;
45
+ display: flex;
46
+ flex-direction: column;
47
+ }
48
+
49
+ textarea {
50
+ width: 100%;
51
+ height: 140px;
52
+ padding: 10px;
53
+ font-size: 16px;
54
+ }
55
+
56
+ .buttons {
57
+ display: flex;
58
+ justify-content: space-between;
59
+ width: 100%;
60
+ margin-top: 10px;
61
+ }
62
+
63
+ .buttons button {
64
+ width: 49.5%;
65
+ padding: 12px;
66
+ font-size: 16px;
67
+ border: none;
68
+ border-radius: 4px;
69
+ cursor: pointer;
70
+ }
71
+
72
+ .submit-btn {
73
+ background: #4CAF50;
74
+ color: white;
75
+ }
76
+
77
+ .clear-btn {
78
+ background: #45a049;
79
+ color: white;
80
+ }
81
+
82
+ .share-btn {
83
+ width: 100%;
84
+ padding: 14px;
85
+ font-size: 16px;
86
+ background: #007BFF;
87
+ color: white;
88
+ border: none;
89
+ border-radius: 4px;
90
+ margin-top: auto;
91
+ }
92
+
93
+ .output {
94
+ background: white;
95
+ padding: 15px;
96
+ border-radius: 8px;
97
+ box-shadow: 0 0 10px rgba(0,0,0,0.1);
98
+ font-size: 16px;
99
+ min-height: 120px;
100
+ margin-bottom: 20px;
101
+ }
102
+
103
+ .dark-mode .output {
104
+ background: #1e1e1e;
105
+ color: #fff;
106
+ }
107
+
108
+ .mode-toggle {
109
+ margin-top: 60px;
110
+ font-size: 14px;
111
+ }
112
+ </style>
113
+ </head>
114
+ <body>
115
+ <h1 id="page-title">Hate Speech Detection</h1>
116
+ <p class="subtext">
117
+ This tool uses 3 models (GloVe-based Deep Learning (GRU), RoBERTa (Transformer), ToxiGen-HateBERT (Transformer))
118
+ to classify hate speech using an ensemble method.
119
+ </p>
120
+
121
+ <div class="container">
122
+ <div class="left-panel">
123
+ <form id="hs-form">
124
+ <textarea id="text-input" placeholder="Enter text here..."></textarea>
125
+ <div class="buttons">
126
+ <button type="button" class="clear-btn" onclick="clearText()">Clear Text</button>
127
+ <button type="submit" class="submit-btn">Submit</button>
128
+ </div>
129
+ </form>
130
+ </div>
131
+
132
+ <div class="right-panel">
133
+ <div class="output" id="output" style="display: none;"></div>
134
+ <button class="share-btn" onclick="shareText()">Share via Link</button>
135
+ </div>
136
+ </div>
137
+
138
+ <div class="mode-toggle">
139
+ <label><input type="checkbox" id="mode-toggle"> Dark Mode</label>
140
+ </div>
141
+
142
+ <script>
143
+ const outputDiv = document.getElementById("output");
144
+ const pageTitle = document.getElementById("page-title");
145
+
146
+ document.getElementById("hs-form").addEventListener("submit", async function (e) {
147
+ e.preventDefault();
148
+ const text = document.getElementById("text-input").value;
149
+
150
+ const response = await fetch("/predict", {
151
+ method: "POST",
152
+ headers: { "Content-Type": "application/json" },
153
+ body: JSON.stringify({ text: text }),
154
+ });
155
+
156
+ const result = await response.json();
157
+ outputDiv.style.display = "block";
158
+ outputDiv.innerHTML = `
159
+ <strong>Model 1:</strong> ${result.gru_prob}<br>
160
+ <strong>Model 2:</strong> ${result.roberta_prob}<br>
161
+ <strong>Model 3:</strong> ${result.toxigen_prob}<br>
162
+ <strong>Prediction:</strong> ${result.prediction}
163
+ `;
164
+ });
165
+
166
+ function clearText() {
167
+ document.getElementById("text-input").value = "";
168
+ outputDiv.style.display = "none";
169
+ outputDiv.innerHTML = "";
170
+ }
171
+
172
+ function shareText() {
173
+ const text = document.getElementById("text-input").value;
174
+ const shareUrl = `${window.location.href}?text=${encodeURIComponent(text)}`;
175
+ navigator.clipboard.writeText(shareUrl);
176
+ alert("Sharable link copied to clipboard!");
177
+ }
178
+
179
+ document.getElementById("mode-toggle").addEventListener("change", function () {
180
+ document.body.classList.toggle("dark-mode", this.checked);
181
+ pageTitle.style.color = this.checked ? "white" : "black";
182
+ });
183
+
184
+ // On load: apply shared text if present
185
+ window.onload = function () {
186
+ const urlParams = new URLSearchParams(window.location.search);
187
+ const sharedText = urlParams.get("text");
188
+ if (sharedText) {
189
+ document.getElementById("text-input").value = decodeURIComponent(sharedText);
190
+ }
191
+ };
192
+ </script>
193
+ </body>
194
+ </html>
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ tensorflow
6
+ pydantic
7
+ python-multipart
tokenizerpkl_gru.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f933c6a038133f7d49d3cda5e5e7a1c930813dafdde2ddd0f6dfc0037d2c8108
3
+ size 5651170