Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -2,127 +2,74 @@ import streamlit as st
|
|
2 |
import random
|
3 |
import openai
|
4 |
import joblib
|
5 |
-
import re
|
6 |
-
from nltk.corpus import stopwords
|
7 |
-
from nltk.tokenize import word_tokenize
|
8 |
-
import nltk
|
9 |
-
|
10 |
-
nltk.download('stopwords')
|
11 |
-
nltk.download('punkt')
|
12 |
-
|
13 |
-
StopWords = set(stopwords.words('arabic'))
|
14 |
-
# Set your OpenAI API key here
|
15 |
-
openai.api_key = 'sk-proj-iWuQUklfwcatAyNbwpmhT3BlbkFJhfrEnp9SFu1sdwSPcxsX'
|
16 |
-
|
17 |
|
18 |
# Load the pipeline
|
19 |
pipeLine = joblib.load('model_pipeline.joblib')
|
|
|
|
|
20 |
|
21 |
-
#
|
22 |
-
class TextPreprocessor:
|
23 |
-
def __init__(self):
|
24 |
-
self.StopWords = set(stopwords.words('arabic'))
|
25 |
-
self.ArabicDiacritics = re.compile(r"""
|
26 |
-
ّ | # Tashdid
|
27 |
-
َ | # Fatha
|
28 |
-
ً | # Tanwin Fath
|
29 |
-
ُ | # Damma
|
30 |
-
ٌ | # Tanwin Damm
|
31 |
-
ِ | # Kasra
|
32 |
-
ٍ | # Tanwin Kasr
|
33 |
-
ْ | # Sukun
|
34 |
-
ـ # Tatwil/Kashida
|
35 |
-
""", re.VERBOSE)
|
36 |
-
self.RegrexPattern = re.compile(
|
37 |
-
r"[\U0001F600-\U0001F64F" + # emoticons {😀 , 😆}
|
38 |
-
r"\U0001F300-\U0001F5FF" + # symbols & pictographs {🌍 , 🌞}
|
39 |
-
r"\U0001F680-\U0001F6FF" + # transport & map symbols {🚌 , 🚕 }
|
40 |
-
r"\U0001F1E0-\U0001F1FF]", # flags (iOS) { 🇺🇸 , 🇨🇦 }
|
41 |
-
re.UNICODE
|
42 |
-
)
|
43 |
-
|
44 |
-
def preprocess_text(self, text):
|
45 |
-
# Remove special characters {& $ @} and punctuation {. , ? !}
|
46 |
-
text = re.sub(r'[^\w\s]', '', text)
|
47 |
-
|
48 |
-
# Remove emoji characters
|
49 |
-
text = re.sub(self.RegrexPattern, '', text)
|
50 |
-
|
51 |
-
# Remove Arabic diacritics
|
52 |
-
text = re.sub(self.ArabicDiacritics, '', text)
|
53 |
-
tokens = word_tokenize(text)
|
54 |
-
tokens = [word for word in tokens if word not in self.StopWords]
|
55 |
-
return ' '.join(tokens)
|
56 |
-
|
57 |
-
preprocessor = TextPreprocessor()
|
58 |
-
|
59 |
category_mapping = {
|
60 |
-
0: '
|
61 |
1: 'Finance',
|
62 |
2: 'Medical',
|
63 |
-
3: '
|
64 |
4: 'Religion',
|
65 |
-
5: '
|
66 |
6: 'Tech'
|
67 |
}
|
68 |
-
|
69 |
-
def classify_article(article_text, pipeline):
|
70 |
-
# Preprocess the texts
|
71 |
-
preprocessed_text = preprocessor.preprocess_text(article_text)
|
72 |
-
predicted_category = pipeline.predict([preprocessed_text])[0]
|
73 |
-
return category_mapping.get(predicted_category, "Unknown")
|
74 |
-
|
75 |
def classification_page():
|
76 |
-
st.title("
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
category = classify_article(input_text, pipeLine)
|
87 |
-
st.write("### Predicted Category")
|
88 |
-
st.write(category)
|
89 |
else:
|
90 |
-
st.
|
91 |
-
|
92 |
-
# Function to generate summary using OpenAI
|
93 |
-
def generate_summary(text):
|
94 |
-
response = openai.ChatCompletion.create(
|
95 |
-
model="gpt-3.5-turbo", # Default model
|
96 |
-
messages=[
|
97 |
-
{"role": "system", "content": "You are a helpful assistant that summarizes text."},
|
98 |
-
{"role": "user", "content": text}
|
99 |
-
],
|
100 |
-
temperature=0.7, # Default temperature
|
101 |
-
max_tokens=150, # Default max tokens
|
102 |
-
top_p=1.0,
|
103 |
-
frequency_penalty=0.0,
|
104 |
-
presence_penalty=0.0
|
105 |
-
)
|
106 |
-
return response.choices[0].message['content'].strip()
|
107 |
-
|
108 |
-
# Function for the summarization page
|
109 |
def summarization_page():
|
110 |
-
st.title("
|
111 |
-
st.write("Enter text below and click 'Summarize' to generate a summary.")
|
112 |
|
|
|
|
|
|
|
|
|
|
|
113 |
# Text input from user
|
114 |
-
input_text = st.text_area("
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
# Button to trigger summarization
|
117 |
-
if st.button("
|
118 |
if input_text:
|
119 |
-
with st.spinner("
|
120 |
summary = generate_summary(input_text)
|
121 |
-
st.write("###
|
122 |
st.write(summary)
|
123 |
else:
|
124 |
st.warning("Please enter some text to summarize.")
|
125 |
|
|
|
126 |
def generate_questions(user_text):
|
127 |
questions = [
|
128 |
{
|
@@ -144,11 +91,11 @@ def generate_questions(user_text):
|
|
144 |
return questions
|
145 |
|
146 |
def quiz_page():
|
147 |
-
st.title("
|
148 |
|
149 |
-
user_text = st.text_area("
|
150 |
|
151 |
-
if st.button("
|
152 |
if user_text:
|
153 |
questions = generate_questions(user_text)
|
154 |
st.session_state.questions = questions
|
@@ -157,36 +104,36 @@ def quiz_page():
|
|
157 |
st.session_state.asked_questions = []
|
158 |
|
159 |
if 'questions' in st.session_state and len(st.session_state.questions) > 0:
|
160 |
-
if st.button("
|
161 |
if len(st.session_state.asked_questions) < len(st.session_state.questions):
|
162 |
available_questions = [q for q in st.session_state.questions if q not in st.session_state.asked_questions]
|
163 |
st.session_state.current_question = random.choice(available_questions)
|
164 |
st.session_state.asked_questions.append(st.session_state.current_question)
|
165 |
else:
|
166 |
-
st.write("
|
167 |
|
168 |
if st.session_state.current_question:
|
169 |
question = st.session_state.current_question
|
170 |
-
st.write(f"
|
171 |
-
user_answer = st.radio("
|
172 |
|
173 |
-
if st.button("
|
174 |
if user_answer == question['answer']:
|
175 |
st.session_state.score += 1
|
176 |
st.session_state.current_question = None
|
177 |
|
178 |
-
if st.button("
|
179 |
-
st.write(f"
|
180 |
st.session_state.score = 0
|
181 |
st.session_state.asked_questions = []
|
182 |
st.session_state.questions = []
|
183 |
|
184 |
# Add navigation
|
185 |
-
page = st.sidebar.selectbox("
|
186 |
|
187 |
-
if page == "
|
188 |
classification_page()
|
189 |
-
elif page == "
|
190 |
summarization_page()
|
191 |
else:
|
192 |
-
quiz_page()
|
|
|
2 |
import random
|
3 |
import openai
|
4 |
import joblib
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
|
6 |
# Load the pipeline
|
7 |
pipeLine = joblib.load('model_pipeline.joblib')
|
8 |
+
# Load the model pipeline
|
9 |
+
model_pipeline = joblib.load('model_pipeline.joblib')
|
10 |
|
11 |
+
# Category mapping
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
category_mapping = {
|
13 |
+
0: 'ثقافة',
|
14 |
1: 'Finance',
|
15 |
2: 'Medical',
|
16 |
+
3: 'سياسة',
|
17 |
4: 'Religion',
|
18 |
+
5: 'رياضي',
|
19 |
6: 'Tech'
|
20 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
def classification_page():
|
22 |
+
st.title("صفحة التصنيف")
|
23 |
+
|
24 |
+
article = st.text_area("ادخل المقال هنا", height=150)
|
25 |
+
|
26 |
+
if st.button("صنّف"):
|
27 |
+
if article.strip():
|
28 |
+
# Use the model pipeline to predict the category
|
29 |
+
numeric_prediction = model_pipeline.predict([article])[0]
|
30 |
+
category_prediction = category_mapping.get(numeric_prediction, "Unknown")
|
31 |
+
st.write(f"**{category_prediction}** الصنف المتوقع : ")
|
|
|
|
|
|
|
32 |
else:
|
33 |
+
st.error("Please enter an article to classify.")
|
34 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
def summarization_page():
|
36 |
+
st.title("صفحة التلخيص")
|
|
|
37 |
|
38 |
+
|
39 |
+
# Set your OpenAI API key
|
40 |
+
openai.api_key = 'sk-proj-iWuQUklfwcatAyNbwpmhT3BlbkFJhfrEnp9SFu1sdwSPcxsX'
|
41 |
+
# Streamlit app
|
42 |
+
|
43 |
# Text input from user
|
44 |
+
input_text = st.text_area("ادخل المقال هنا", height=200)
|
45 |
+
|
46 |
+
# Function to generate summary using OpenAI
|
47 |
+
def generate_summary(text):
|
48 |
+
response = openai.ChatCompletion.create(
|
49 |
+
model="gpt-3.5-turbo", # Default model
|
50 |
+
messages=[
|
51 |
+
{"role": "system", "content": "You are a helpful assistant that summarizes text."},
|
52 |
+
{"role": "user", "content": text}
|
53 |
+
],
|
54 |
+
temperature=0.7, # Default temperature
|
55 |
+
max_tokens=150, # Default max tokens
|
56 |
+
top_p=1.0,
|
57 |
+
frequency_penalty=0.0,
|
58 |
+
presence_penalty=0.0
|
59 |
+
)
|
60 |
+
return response.choices[0].message['content'].strip()
|
61 |
+
|
62 |
# Button to trigger summarization
|
63 |
+
if st.button("لخّص"):
|
64 |
if input_text:
|
65 |
+
with st.spinner("إنشاء التلخيص"):
|
66 |
summary = generate_summary(input_text)
|
67 |
+
st.write("### الملخص ")
|
68 |
st.write(summary)
|
69 |
else:
|
70 |
st.warning("Please enter some text to summarize.")
|
71 |
|
72 |
+
|
73 |
def generate_questions(user_text):
|
74 |
questions = [
|
75 |
{
|
|
|
91 |
return questions
|
92 |
|
93 |
def quiz_page():
|
94 |
+
st.title("صفحة الاختبار")
|
95 |
|
96 |
+
user_text = st.text_area("ادخل المقال هنا", height=150)
|
97 |
|
98 |
+
if st.button("أنشئ الأسئلة"):
|
99 |
if user_text:
|
100 |
questions = generate_questions(user_text)
|
101 |
st.session_state.questions = questions
|
|
|
104 |
st.session_state.asked_questions = []
|
105 |
|
106 |
if 'questions' in st.session_state and len(st.session_state.questions) > 0:
|
107 |
+
if st.button("اسأل"):
|
108 |
if len(st.session_state.asked_questions) < len(st.session_state.questions):
|
109 |
available_questions = [q for q in st.session_state.questions if q not in st.session_state.asked_questions]
|
110 |
st.session_state.current_question = random.choice(available_questions)
|
111 |
st.session_state.asked_questions.append(st.session_state.current_question)
|
112 |
else:
|
113 |
+
st.write("تم عرض جميع الأسئلة")
|
114 |
|
115 |
if st.session_state.current_question:
|
116 |
question = st.session_state.current_question
|
117 |
+
st.write(f"السؤال: {question['question']}")
|
118 |
+
user_answer = st.radio("اختر الإجابة", question['options'], key="answer")
|
119 |
|
120 |
+
if st.button("سلّم الإجابة"):
|
121 |
if user_answer == question['answer']:
|
122 |
st.session_state.score += 1
|
123 |
st.session_state.current_question = None
|
124 |
|
125 |
+
if st.button("إنهاء الاختبار"):
|
126 |
+
st.write(f"نتيجة الاختبار {st.session_state.score} من {len(st.session_state.asked_questions)}")
|
127 |
st.session_state.score = 0
|
128 |
st.session_state.asked_questions = []
|
129 |
st.session_state.questions = []
|
130 |
|
131 |
# Add navigation
|
132 |
+
page = st.sidebar.selectbox("اختر صفحة", ["التصنيف", "التلخيص", "الاختبار"])
|
133 |
|
134 |
+
if page == "التصنيف":
|
135 |
classification_page()
|
136 |
+
elif page == "التلخيص":
|
137 |
summarization_page()
|
138 |
else:
|
139 |
+
quiz_page()
|