sercetexam9 commited on
Commit
451f6fd
·
verified ·
1 Parent(s): 6028d53

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import string
3
+ import pandas as pd
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.metrics import accuracy_score, confusion_matrix
6
+ from sklearn import feature_extraction, linear_model, model_selection, preprocessing
7
+ from sklearn.metrics import accuracy_score,precision_score
8
+ from sklearn.model_selection import train_test_split
9
+ from sklearn.pipeline import Pipeline
10
+ import matplotlib.pyplot as plt
11
+ import seaborn as sns
12
+ import nltk
13
+ from nltk.corpus import stopwords
14
+ from nltk.stem import PorterStemmer
15
+ from nltk.tokenize import sent_tokenize, word_tokenize
16
+ from wordcloud import WordCloud, STOPWORDS
17
+ from tokenizers import (
18
+ decoders,
19
+ models,
20
+ normalizers,
21
+ pre_tokenizers,
22
+ processors,
23
+ trainers,
24
+ Tokenizer,
25
+ )
26
+ import gc
27
+ import warnings
28
+ warnings.filterwarnings("ignore")
29
+ nltk.download('punkt')
30
+ nltk.download('stopwords')
31
+ import pickle
32
+ model = pickle.load(open("/content/fakenews.sav", 'rb'))
33
+ import gradio as gr
34
+ def predict(text):
35
+ text=pd.DataFrame([text], columns=["text"])
36
+ text=text["text"]
37
+ text=text.apply(wordpre)
38
+ text=lower_and_tokenize(text)
39
+ text = text.apply(lambda x: [lemmatizer.lemmatize(word) for word in x])
40
+ text = text.apply(lambda x: ' '.join(x))
41
+ # tokenize and encode sequences in the test set
42
+ tokens_text = tokenizer.batch_encode_plus(
43
+ text.tolist(),
44
+ max_length = max_seq_len,
45
+ padding="max_length",
46
+ truncation=True,
47
+ return_token_type_ids=True,
48
+ add_special_tokens = True,
49
+ )
50
+ # for text set
51
+ text_seq = torch.tensor(tokens_text['input_ids'])
52
+ text_mask = torch.tensor(tokens_text['attention_mask'])
53
+ text_y=torch.tensor([0])
54
+ # wrap tensors
55
+ text_data = TensorDataset(text_seq, text_mask,text_y)
56
+
57
+ # sampler for sampling the data during testing
58
+ text_sampler = SequentialSampler(text_data)
59
+
60
+ # dataLoader for test set
61
+ text_dataloader = DataLoader(text_data, sampler = text_sampler, batch_size=batch_size)
62
+ random.seed(seed_val)
63
+ torch.manual_seed(seed_val)
64
+ torch.cuda.manual_seed_all(seed_val)
65
+ total_eval_accuracy = 0
66
+ total_eval_loss = 0
67
+ y_true = []
68
+ y_pred = []
69
+ total_t0 = time.time()
70
+ use=listmodel[bestidx]
71
+ use.eval()
72
+ total_eval_accuracy = 0
73
+ total_eval_loss = 0
74
+ y_true = []
75
+ y_pred = []
76
+ t0=time.time()
77
+
78
+ for batch in text_dataloader:
79
+ input_ids = batch[0].to(device)
80
+ input_mask = batch[1].to(device)
81
+ labels = batch[2].to(device)
82
+ with torch.no_grad():
83
+ out = model(input_ids, input_mask,labels=labels)
84
+ del input_ids,input_mask
85
+ gc.collect()
86
+ loss = out[0]
87
+ logi = out.logits
88
+ logits_tensor = torch.tensor(logi)
89
+ loss = criterion(logits_tensor, labels)
90
+ total_eval_loss += loss.item()
91
+ pred = torch.argmax(logits_tensor, dim = 1)
92
+ y_pred.append(pred.flatten())
93
+ del pred,logi,out,logits_tensor
94
+ gc.collect()
95
+
96
+ predict=str()
97
+ if y_pred==[1]:
98
+ predict="This may be a fake news."
99
+ else:
100
+ predict="This may be a real news."
101
+ return predict
102
+ demo = gr.Interface(
103
+ fn=predict,
104
+ inputs=["text"],
105
+ outputs=["predict"],
106
+ )
107
+ demo.launch(share=True)