Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -12,53 +12,62 @@ from time import sleep
|
|
12 |
import tiktoken
|
13 |
import asyncio # ๋น๋๊ธฐ ์ฒ๋ฆฌ๋ฅผ ์ํด asyncio ์ถ๊ฐ
|
14 |
|
15 |
-
#
|
16 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
encoding = tiktoken.get_encoding("cl100k_base")
|
18 |
|
19 |
-
|
20 |
-
|
|
|
21 |
|
22 |
-
|
23 |
-
# Set the webpage title
|
24 |
st.set_page_config(
|
25 |
-
page_title=f"Your LocalGPT โจ with {
|
26 |
page_icon="๐",
|
27 |
layout="wide")
|
28 |
|
|
|
29 |
if "hf_model" not in st.session_state:
|
30 |
-
st.session_state.hf_model =
|
31 |
-
# Initialize chat history for the LLM
|
32 |
if "messages" not in st.session_state:
|
33 |
st.session_state.messages = []
|
34 |
-
|
35 |
-
# Initialize the ChatMEssages for visualization only
|
36 |
if "chatMessages" not in st.session_state:
|
37 |
st.session_state.chatMessages = []
|
38 |
-
|
39 |
if "repeat" not in st.session_state:
|
40 |
st.session_state.repeat = 1.35
|
41 |
-
|
42 |
if "temperature" not in st.session_state:
|
43 |
st.session_state.temperature = 0.1
|
44 |
-
|
45 |
if "maxlength" not in st.session_state:
|
46 |
st.session_state.maxlength = 500
|
47 |
-
|
48 |
if "speed" not in st.session_state:
|
49 |
st.session_state.speed = 0.0
|
50 |
-
|
51 |
if "numOfTurns" not in st.session_state:
|
52 |
st.session_state.numOfTurns = 0
|
53 |
-
|
54 |
if "maxTurns" not in st.session_state:
|
55 |
-
st.session_state.maxTurns = 5 #must be odd number, greater than equal to 5
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
def writehistory(filename,text):
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
62 |
|
63 |
def genRANstring(n):
|
64 |
"""
|
@@ -72,6 +81,7 @@ def genRANstring(n):
|
|
72 |
|
73 |
@st.cache_resource
|
74 |
def create_chat():
|
|
|
75 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
76 |
ov_model = OVModelForCausalLM.from_pretrained(
|
77 |
model_id = model_id,
|
@@ -82,6 +92,9 @@ def create_chat():
|
|
82 |
#Credit to https://github.com/openvino-dev-samples/chatglm3.openvino/blob/main/chat.py
|
83 |
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
84 |
return tokenizer, ov_model, streamer
|
|
|
|
|
|
|
85 |
|
86 |
@st.cache_resource
|
87 |
def countTokens(text):
|
@@ -89,145 +102,137 @@ def countTokens(text):
|
|
89 |
numoftokens = len(encoding.encode(text))
|
90 |
return numoftokens
|
91 |
|
92 |
-
# create THE SESSIoN STATES
|
93 |
-
if "logfilename" not in st.session_state:
|
94 |
-
## Logger file
|
95 |
-
logfile = f'logs/Gemma2-2B_{genRANstring(5)}_log.txt'
|
96 |
-
st.session_state.logfilename = logfile
|
97 |
-
#Write in the history the first 2 sessions
|
98 |
-
writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with ๐ {modelname}\n---\n๐ง ๐ซก: You are a helpful assistant.')
|
99 |
-
writehistory(st.session_state.logfilename,f'๐: How may I help you today?')
|
100 |
-
|
101 |
|
102 |
-
#AVATARS
|
103 |
-
av_us =
|
104 |
-
av_ass =
|
105 |
nCTX = 8192
|
106 |
### START STREAMLIT UI
|
107 |
-
# Create a header element
|
108 |
-
st.
|
109 |
-
|
110 |
-
st.markdown(
|
111 |
-
st.markdown(f'#### Powered by OpenVINO')
|
112 |
|
113 |
-
# CREATE THE SIDEBAR
|
114 |
with st.sidebar:
|
115 |
-
st.
|
|
|
|
|
|
|
116 |
st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.65, step=0.01)
|
117 |
st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=2000,
|
118 |
value=550, step=50)
|
119 |
st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.176, step=0.02)
|
|
|
|
|
120 |
st.session_state.turns = st.toggle('Turn based', value=False, help='Activate Conversational Turn Chat with History',
|
121 |
disabled=False, label_visibility="visible")
|
122 |
st.markdown(f"*Number of Max Turns*: {st.session_state.maxTurns}")
|
123 |
actualTurns = st.markdown(f"*Chat History Lenght*: :green[Good]")
|
124 |
statspeed = st.markdown(f'๐ซ speed: {st.session_state.speed} t/s')
|
125 |
btnClear = st.button("Clear History",type="primary", use_container_width=True)
|
|
|
|
|
126 |
st.markdown(f"**Logfile**: {st.session_state.logfilename}")
|
127 |
|
128 |
-
tokenizer,ov_model,streamer = create_chat()
|
129 |
-
|
130 |
-
#
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
st.
|
135 |
-
|
136 |
-
|
137 |
-
st.
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
|
|
167 |
else:
|
168 |
-
conv_messages
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
201 |
|
202 |
delta_time = datetime.datetime.now() - start_time
|
203 |
-
total_seconds = delta_time.total_seconds()
|
204 |
prompt_tokens = len(encoding.encode(myprompt))
|
205 |
assistant_tokens = len(encoding.encode(full_response))
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
๐ซ speed: {st.session_state.speed:.3f} t/s
|
224 |
-
๐ time to first token: {ttfseconds:.2f} seconds
|
225 |
-
```"""
|
226 |
-
message_placeholder.markdown(toregister)
|
227 |
-
asstext = f"assistant: {toregister}"
|
228 |
-
writehistory(st.session_state.logfilename, asstext)
|
229 |
-
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
230 |
-
st.session_state.chatMessages.append({"role": "assistant", "content": toregister})
|
231 |
-
st.session_state.numOfTurns = len(st.session_state.messages)
|
232 |
-
|
233 |
-
asyncio.run(generate_response()) # ๋น๋๊ธฐ ํจ์ ์คํ
|
|
|
12 |
import tiktoken
|
13 |
import asyncio # ๋น๋๊ธฐ ์ฒ๋ฆฌ๋ฅผ ์ํด asyncio ์ถ๊ฐ
|
14 |
|
15 |
+
# requirements.txt ํ์ผ ํ์:
|
16 |
+
# optimum[openvino]
|
17 |
+
# transformers
|
18 |
+
# streamlit
|
19 |
+
# tiktoken
|
20 |
+
# asyncio
|
21 |
+
|
22 |
+
# ํ ํฐ ์ ๊ณ์ฐ์ ์ํ ์ธ์ฝ๋ฉ ์ค์
|
23 |
encoding = tiktoken.get_encoding("cl100k_base")
|
24 |
|
25 |
+
# ๋ชจ๋ธ ์ด๋ฆ ๋ฐ ID ์ค์ (๋ณ์ ํต์ผ)
|
26 |
+
model_name = "Gemma2-2B-it"
|
27 |
+
model_id = "AIFunOver/gemma-2-2b-it-openvino-4bit" # Hugging Face Hub ๋ชจ๋ธ ID
|
28 |
|
29 |
+
# ์นํ์ด์ง ๊ธฐ๋ณธ ์ค์
|
|
|
30 |
st.set_page_config(
|
31 |
+
page_title=f"Your LocalGPT โจ with {model_name}",
|
32 |
page_icon="๐",
|
33 |
layout="wide")
|
34 |
|
35 |
+
# Session State ์ด๊ธฐํ (Hugging Face Space ์ฌ์คํ ์ ์ํ ์ ์ง)
|
36 |
if "hf_model" not in st.session_state:
|
37 |
+
st.session_state.hf_model = model_name
|
|
|
38 |
if "messages" not in st.session_state:
|
39 |
st.session_state.messages = []
|
|
|
|
|
40 |
if "chatMessages" not in st.session_state:
|
41 |
st.session_state.chatMessages = []
|
|
|
42 |
if "repeat" not in st.session_state:
|
43 |
st.session_state.repeat = 1.35
|
|
|
44 |
if "temperature" not in st.session_state:
|
45 |
st.session_state.temperature = 0.1
|
|
|
46 |
if "maxlength" not in st.session_state:
|
47 |
st.session_state.maxlength = 500
|
|
|
48 |
if "speed" not in st.session_state:
|
49 |
st.session_state.speed = 0.0
|
|
|
50 |
if "numOfTurns" not in st.session_state:
|
51 |
st.session_state.numOfTurns = 0
|
|
|
52 |
if "maxTurns" not in st.session_state:
|
53 |
+
st.session_state.maxTurns = 5 # must be odd number, greater than equal to 5
|
54 |
+
if "logfilename" not in st.session_state:
|
55 |
+
## Logger file
|
56 |
+
logfile = f'logs/Gemma2-2B_{genRANstring(5)}_log.txt' # Space ๋ฃจํธ์ logs ํด๋์ ์ ์ฅ
|
57 |
+
st.session_state.logfilename = logfile
|
58 |
+
# Write in the history the first 2 sessions
|
59 |
+
writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with ๐ {model_name}\n---\n๐ง ๐ซก: You are a helpful assistant.')
|
60 |
+
writehistory(st.session_state.logfilename,f'๐: How may I help you today?')
|
61 |
+
|
62 |
|
63 |
def writehistory(filename,text):
|
64 |
+
try:
|
65 |
+
with open(filename, 'a', encoding='utf-8') as f:
|
66 |
+
f.write(text)
|
67 |
+
f.write('\n')
|
68 |
+
f.close()
|
69 |
+
except Exception as e:
|
70 |
+
print(f"Error writing to log file: {e}") # Log error to console
|
71 |
|
72 |
def genRANstring(n):
|
73 |
"""
|
|
|
81 |
|
82 |
@st.cache_resource
|
83 |
def create_chat():
|
84 |
+
try:
|
85 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
86 |
ov_model = OVModelForCausalLM.from_pretrained(
|
87 |
model_id = model_id,
|
|
|
92 |
#Credit to https://github.com/openvino-dev-samples/chatglm3.openvino/blob/main/chat.py
|
93 |
streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
|
94 |
return tokenizer, ov_model, streamer
|
95 |
+
except Exception as e:
|
96 |
+
st.error(f"Error loading model: {e}")
|
97 |
+
return None, None, None # Return None values to indicate failure
|
98 |
|
99 |
@st.cache_resource
|
100 |
def countTokens(text):
|
|
|
102 |
numoftokens = len(encoding.encode(text))
|
103 |
return numoftokens
|
104 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
+
#AVATARS - using emojis instead of images
|
107 |
+
av_us = "๐ค" # User avatar emoji
|
108 |
+
av_ass = "๐ค" # Assistant avatar emoji
|
109 |
nCTX = 8192
|
110 |
### START STREAMLIT UI
|
111 |
+
# Create a header element - using markdown instead of image
|
112 |
+
st.header(f"๐ {model_name} Chatbot")
|
113 |
+
st.markdown(f"> *๐ {model_name} with {nCTX} tokens Context window* - Turn based Chat available with max capacity of :orange[**{st.session_state.maxTurns} messages**].", unsafe_allow_html=True)
|
114 |
+
st.markdown(f"#### Powered by OpenVINO")
|
|
|
115 |
|
116 |
+
# CREATE THE SIDEBAR - using markdown and text instead of images
|
117 |
with st.sidebar:
|
118 |
+
st.subheader("Configuration") # Sidebar header
|
119 |
+
# st.image('images/banner.png', use_column_width=True) # Removed image
|
120 |
+
st.markdown("---")
|
121 |
+
st.markdown("**Model Parameters**")
|
122 |
st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.65, step=0.01)
|
123 |
st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=2000,
|
124 |
value=550, step=50)
|
125 |
st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.176, step=0.02)
|
126 |
+
st.markdown("---")
|
127 |
+
st.markdown("**Chat Options**")
|
128 |
st.session_state.turns = st.toggle('Turn based', value=False, help='Activate Conversational Turn Chat with History',
|
129 |
disabled=False, label_visibility="visible")
|
130 |
st.markdown(f"*Number of Max Turns*: {st.session_state.maxTurns}")
|
131 |
actualTurns = st.markdown(f"*Chat History Lenght*: :green[Good]")
|
132 |
statspeed = st.markdown(f'๐ซ speed: {st.session_state.speed} t/s')
|
133 |
btnClear = st.button("Clear History",type="primary", use_container_width=True)
|
134 |
+
st.markdown("---")
|
135 |
+
st.markdown("**Logs**")
|
136 |
st.markdown(f"**Logfile**: {st.session_state.logfilename}")
|
137 |
|
138 |
+
tokenizer, ov_model, streamer = create_chat()
|
139 |
+
|
140 |
+
if tokenizer and ov_model and streamer: # Only proceed if model loading was successful
|
141 |
+
# Display chat messages from history on app rerun
|
142 |
+
for message in st.session_state.chatMessages:
|
143 |
+
if message["role"] == "user":
|
144 |
+
with st.chat_message(message["role"],avatar=av_us):
|
145 |
+
st.markdown(message["content"])
|
146 |
+
else:
|
147 |
+
with st.chat_message(message["role"],avatar=av_ass):
|
148 |
+
st.markdown(message["content"])
|
149 |
+
|
150 |
+
# Accept user input using text_area and form for more dynamic updates
|
151 |
+
with st.form(key='chat_form', clear_on_submit=False): # clear_on_submit=False ์ค์! ํผ ๋ด์ฉ ์ ์ง, ์ ์ถ ๋ฒํผ ์ ๊ฑฐ
|
152 |
+
myprompt = st.text_area("What is an AI model?", key="prompt_input", height=100) # text_area ์ฌ์ฉ
|
153 |
+
|
154 |
+
|
155 |
+
if myprompt: # myprompt ๊ฐ ์
๋ ฅ๋๋ฉด (text_area ๋ด์ฉ์ด ๋ณ๊ฒฝ๋๋ฉด)
|
156 |
+
# Add user message to chat history
|
157 |
+
st.session_state.messages.append({"role": "user", "content": myprompt})
|
158 |
+
st.session_state.chatMessages.append({"role": "user", "content": myprompt})
|
159 |
+
st.session_state.numOfTurns = len(st.session_state.messages)
|
160 |
+
# Display user message in chat message container
|
161 |
+
with st.chat_message("user", avatar=av_us):
|
162 |
+
st.markdown(myprompt)
|
163 |
+
usertext = f"user: {myprompt}"
|
164 |
+
writehistory(st.session_state.logfilename,usertext)
|
165 |
+
# Display assistant response in chat message container
|
166 |
+
with st.chat_message("assistant",avatar=av_ass):
|
167 |
+
message_placeholder = st.empty()
|
168 |
+
with st.spinner("Thinking..."):
|
169 |
+
start = datetime.datetime.now()
|
170 |
+
response = ''
|
171 |
+
conv_messages = []
|
172 |
+
if st.session_state.turns:
|
173 |
+
if st.session_state.numOfTurns > st.session_state.maxTurns:
|
174 |
+
conv_messages = st.session_state.messages[-st.session_state.maxTurns:]
|
175 |
+
actualTurns.markdown(f"*Chat History Lenght*: :red[Trimmed]")
|
176 |
+
else:
|
177 |
+
conv_messages = st.session_state.messages
|
178 |
else:
|
179 |
+
conv_messages.append(st.session_state.messages[-1])
|
180 |
+
|
181 |
+
full_response = ""
|
182 |
+
model_inputs = tokenizer.apply_chat_template(conv_messages,
|
183 |
+
add_generation_prompt=True,
|
184 |
+
tokenize=True,
|
185 |
+
return_tensors="pt")
|
186 |
+
generate_kwargs = dict(input_ids=model_inputs,
|
187 |
+
max_new_tokens=st.session_state.maxlength,
|
188 |
+
temperature=st.session_state.temperature,
|
189 |
+
do_sample=True,
|
190 |
+
top_p=0.5,
|
191 |
+
repetition_penalty=st.session_state.repeat,
|
192 |
+
streamer=streamer)
|
193 |
+
|
194 |
+
# ๋น๋๊ธฐ์ ์ผ๋ก ๋ชจ๋ธ ์์ฑ ์คํ (asyncio ์ฌ์ฉ)
|
195 |
+
async def generate_response():
|
196 |
+
t1 = Thread(target=ov_model.generate, kwargs=generate_kwargs)
|
197 |
+
t1.start()
|
198 |
+
start_time = datetime.datetime.now()
|
199 |
+
partial_text = ""
|
200 |
+
first_token = 0
|
201 |
+
for chunk in streamer:
|
202 |
+
if first_token == 0:
|
203 |
+
ttft = datetime.datetime.now() - start_time
|
204 |
+
first_token = 1
|
205 |
+
for char in chunk:
|
206 |
+
partial_text += char
|
207 |
+
message_placeholder.markdown(partial_text + "๐ก")
|
208 |
+
sleep(0.005) # ๋ ๋น ๋ฅธ ํ์๊ธฐ ํจ๊ณผ (0.005์ด๋ก ๊ฐ์, ํ์์ ๋ฐ๋ผ ์กฐ์ )
|
209 |
+
full_response += chunk
|
210 |
+
|
211 |
+
delta_time = datetime.datetime.now() - start_time
|
212 |
+
total_seconds = delta_time.total_seconds()
|
213 |
+
prompt_tokens = len(encoding.encode(myprompt))
|
214 |
+
assistant_tokens = len(encoding.encode(full_response))
|
215 |
+
total_tokens = prompt_tokens + assistant_tokens
|
216 |
+
st.session_state.speed = total_tokens / total_seconds
|
217 |
+
statspeed.markdown(f'๐ซ speed: {st.session_state.speed:.2f} t/s')
|
218 |
|
219 |
delta_time = datetime.datetime.now() - start_time
|
|
|
220 |
prompt_tokens = len(encoding.encode(myprompt))
|
221 |
assistant_tokens = len(encoding.encode(full_response))
|
222 |
+
|
223 |
+
message_placeholder.markdown(full_response) # Display only the response, without stats
|
224 |
+
asstext = f"assistant: {full_response}"
|
225 |
+
writehistory(st.session_state.logfilename, asstext)
|
226 |
+
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
227 |
+
st.session_state.chatMessages.append({"role": "assistant", "content": full_response}) # Store just the response
|
228 |
+
st.session_state.numOfTurns = len(st.session_state.messages)
|
229 |
+
|
230 |
+
asyncio.run(generate_response()) # ๋น๋๊ธฐ ํจ์ ์คํ
|
231 |
+
|
232 |
+
if btnClear: # Clear History ๋ฒํผ ํด๋ฆญ ์
|
233 |
+
st.session_state.messages = []
|
234 |
+
st.session_state.chatMessages = []
|
235 |
+
st.session_state.numOfTurns = 0
|
236 |
+
st.rerun() # Streamlit ์ฑ ๋ค์ ์คํ
|
237 |
+
else:
|
238 |
+
st.error("Model initialization failed. Please check the logs for details.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|