Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import string
|
3 |
+
import pandas as pd
|
4 |
+
from sklearn.model_selection import train_test_split
|
5 |
+
from sklearn.metrics import accuracy_score, confusion_matrix
|
6 |
+
from sklearn import feature_extraction, linear_model, model_selection, preprocessing
|
7 |
+
from sklearn.metrics import accuracy_score,precision_score
|
8 |
+
from sklearn.model_selection import train_test_split
|
9 |
+
from sklearn.pipeline import Pipeline
|
10 |
+
import matplotlib.pyplot as plt
|
11 |
+
import seaborn as sns
|
12 |
+
import nltk
|
13 |
+
from nltk.corpus import stopwords
|
14 |
+
from nltk.stem import PorterStemmer
|
15 |
+
from nltk.tokenize import sent_tokenize, word_tokenize
|
16 |
+
from wordcloud import WordCloud, STOPWORDS
|
17 |
+
from tokenizers import (
|
18 |
+
decoders,
|
19 |
+
models,
|
20 |
+
normalizers,
|
21 |
+
pre_tokenizers,
|
22 |
+
processors,
|
23 |
+
trainers,
|
24 |
+
Tokenizer,
|
25 |
+
)
|
26 |
+
import gc
|
27 |
+
import warnings
|
28 |
+
warnings.filterwarnings("ignore")
|
29 |
+
nltk.download('punkt')
|
30 |
+
nltk.download('stopwords')
|
31 |
+
import pickle
|
32 |
+
model = pickle.load(open("/content/fakenews.sav", 'rb'))
|
33 |
+
import gradio as gr
|
34 |
+
def predict(text):
|
35 |
+
text=pd.DataFrame([text], columns=["text"])
|
36 |
+
text=text["text"]
|
37 |
+
text=text.apply(wordpre)
|
38 |
+
text=lower_and_tokenize(text)
|
39 |
+
text = text.apply(lambda x: [lemmatizer.lemmatize(word) for word in x])
|
40 |
+
text = text.apply(lambda x: ' '.join(x))
|
41 |
+
# tokenize and encode sequences in the test set
|
42 |
+
tokens_text = tokenizer.batch_encode_plus(
|
43 |
+
text.tolist(),
|
44 |
+
max_length = max_seq_len,
|
45 |
+
padding="max_length",
|
46 |
+
truncation=True,
|
47 |
+
return_token_type_ids=True,
|
48 |
+
add_special_tokens = True,
|
49 |
+
)
|
50 |
+
# for text set
|
51 |
+
text_seq = torch.tensor(tokens_text['input_ids'])
|
52 |
+
text_mask = torch.tensor(tokens_text['attention_mask'])
|
53 |
+
text_y=torch.tensor([0])
|
54 |
+
# wrap tensors
|
55 |
+
text_data = TensorDataset(text_seq, text_mask,text_y)
|
56 |
+
|
57 |
+
# sampler for sampling the data during testing
|
58 |
+
text_sampler = SequentialSampler(text_data)
|
59 |
+
|
60 |
+
# dataLoader for test set
|
61 |
+
text_dataloader = DataLoader(text_data, sampler = text_sampler, batch_size=batch_size)
|
62 |
+
random.seed(seed_val)
|
63 |
+
torch.manual_seed(seed_val)
|
64 |
+
torch.cuda.manual_seed_all(seed_val)
|
65 |
+
total_eval_accuracy = 0
|
66 |
+
total_eval_loss = 0
|
67 |
+
y_true = []
|
68 |
+
y_pred = []
|
69 |
+
total_t0 = time.time()
|
70 |
+
use=listmodel[bestidx]
|
71 |
+
use.eval()
|
72 |
+
total_eval_accuracy = 0
|
73 |
+
total_eval_loss = 0
|
74 |
+
y_true = []
|
75 |
+
y_pred = []
|
76 |
+
t0=time.time()
|
77 |
+
|
78 |
+
for batch in text_dataloader:
|
79 |
+
input_ids = batch[0].to(device)
|
80 |
+
input_mask = batch[1].to(device)
|
81 |
+
labels = batch[2].to(device)
|
82 |
+
with torch.no_grad():
|
83 |
+
out = model(input_ids, input_mask,labels=labels)
|
84 |
+
del input_ids,input_mask
|
85 |
+
gc.collect()
|
86 |
+
loss = out[0]
|
87 |
+
logi = out.logits
|
88 |
+
logits_tensor = torch.tensor(logi)
|
89 |
+
loss = criterion(logits_tensor, labels)
|
90 |
+
total_eval_loss += loss.item()
|
91 |
+
pred = torch.argmax(logits_tensor, dim = 1)
|
92 |
+
y_pred.append(pred.flatten())
|
93 |
+
del pred,logi,out,logits_tensor
|
94 |
+
gc.collect()
|
95 |
+
|
96 |
+
predict=str()
|
97 |
+
if y_pred==[1]:
|
98 |
+
predict="This may be a fake news."
|
99 |
+
else:
|
100 |
+
predict="This may be a real news."
|
101 |
+
return predict
|
102 |
+
demo = gr.Interface(
|
103 |
+
fn=predict,
|
104 |
+
inputs=["text"],
|
105 |
+
outputs=["predict"],
|
106 |
+
)
|
107 |
+
demo.launch(share=True)
|