kimhyunwoo commited on
Commit
2dff707
ยท
verified ยท
1 Parent(s): 491af54

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +147 -142
app.py CHANGED
@@ -12,53 +12,62 @@ from time import sleep
12
  import tiktoken
13
  import asyncio # ๋น„๋™๊ธฐ ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•ด asyncio ์ถ”๊ฐ€
14
 
15
- # for counting the tokens in the prompt and in the result
16
- #context_count = len(encoding.encode(yourtext))
 
 
 
 
 
 
17
  encoding = tiktoken.get_encoding("cl100k_base")
18
 
19
- modelname = "Gemma2-2B-it"
20
- model_id = "AIFunOver/gemma-2-2b-it-openvino-4bit" # Updated model ID
 
21
 
22
-
23
- # Set the webpage title
24
  st.set_page_config(
25
- page_title=f"Your LocalGPT โœจ with {modelname}",
26
  page_icon="๐ŸŒŸ",
27
  layout="wide")
28
 
 
29
  if "hf_model" not in st.session_state:
30
- st.session_state.hf_model = "Gemma2-2B-it"
31
- # Initialize chat history for the LLM
32
  if "messages" not in st.session_state:
33
  st.session_state.messages = []
34
-
35
- # Initialize the ChatMEssages for visualization only
36
  if "chatMessages" not in st.session_state:
37
  st.session_state.chatMessages = []
38
-
39
  if "repeat" not in st.session_state:
40
  st.session_state.repeat = 1.35
41
-
42
  if "temperature" not in st.session_state:
43
  st.session_state.temperature = 0.1
44
-
45
  if "maxlength" not in st.session_state:
46
  st.session_state.maxlength = 500
47
-
48
  if "speed" not in st.session_state:
49
  st.session_state.speed = 0.0
50
-
51
  if "numOfTurns" not in st.session_state:
52
  st.session_state.numOfTurns = 0
53
-
54
  if "maxTurns" not in st.session_state:
55
- st.session_state.maxTurns = 5 #must be odd number, greater than equal to 5
 
 
 
 
 
 
 
 
56
 
57
  def writehistory(filename,text):
58
- with open(filename, 'a', encoding='utf-8') as f:
59
- f.write(text)
60
- f.write('\n')
61
- f.close()
 
 
 
62
 
63
  def genRANstring(n):
64
  """
@@ -72,6 +81,7 @@ def genRANstring(n):
72
 
73
  @st.cache_resource
74
  def create_chat():
 
75
  tokenizer = AutoTokenizer.from_pretrained(model_id)
76
  ov_model = OVModelForCausalLM.from_pretrained(
77
  model_id = model_id,
@@ -82,6 +92,9 @@ def create_chat():
82
  #Credit to https://github.com/openvino-dev-samples/chatglm3.openvino/blob/main/chat.py
83
  streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
84
  return tokenizer, ov_model, streamer
 
 
 
85
 
86
  @st.cache_resource
87
  def countTokens(text):
@@ -89,145 +102,137 @@ def countTokens(text):
89
  numoftokens = len(encoding.encode(text))
90
  return numoftokens
91
 
92
- # create THE SESSIoN STATES
93
- if "logfilename" not in st.session_state:
94
- ## Logger file
95
- logfile = f'logs/Gemma2-2B_{genRANstring(5)}_log.txt'
96
- st.session_state.logfilename = logfile
97
- #Write in the history the first 2 sessions
98
- writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with ๐ŸŒ€ {modelname}\n---\n๐Ÿง ๐Ÿซก: You are a helpful assistant.')
99
- writehistory(st.session_state.logfilename,f'๐ŸŒ€: How may I help you today?')
100
-
101
 
102
- #AVATARS
103
- av_us = 'images/user.png' # './man.png' #"๐Ÿฆ–" #A single emoji, e.g. "๐Ÿง‘โ€๐Ÿ’ป", "๐Ÿค–", "๐Ÿฆ–". Shortcodes are not supported.
104
- av_ass = 'images/assistant2.png' #'./robot.png'
105
  nCTX = 8192
106
  ### START STREAMLIT UI
107
- # Create a header element
108
- st.image('images/Gemma-2-Banner.original.png',use_column_width=True)
109
- mytitle = f'> *๐ŸŒŸ {modelname} with {nCTX} tokens Context window* - Turn based Chat available with max capacity of :orange[**{st.session_state.maxTurns} messages**].'
110
- st.markdown(mytitle, unsafe_allow_html=True)
111
- st.markdown(f'#### Powered by OpenVINO')
112
 
113
- # CREATE THE SIDEBAR
114
  with st.sidebar:
115
- st.image('images/banner.png', use_column_width=True)
 
 
 
116
  st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.65, step=0.01)
117
  st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=2000,
118
  value=550, step=50)
119
  st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.176, step=0.02)
 
 
120
  st.session_state.turns = st.toggle('Turn based', value=False, help='Activate Conversational Turn Chat with History',
121
  disabled=False, label_visibility="visible")
122
  st.markdown(f"*Number of Max Turns*: {st.session_state.maxTurns}")
123
  actualTurns = st.markdown(f"*Chat History Lenght*: :green[Good]")
124
  statspeed = st.markdown(f'๐Ÿ’ซ speed: {st.session_state.speed} t/s')
125
  btnClear = st.button("Clear History",type="primary", use_container_width=True)
 
 
126
  st.markdown(f"**Logfile**: {st.session_state.logfilename}")
127
 
128
- tokenizer,ov_model,streamer = create_chat()
129
-
130
- # Display chat messages from history on app rerun
131
- for message in st.session_state.chatMessages:
132
- if message["role"] == "user":
133
- with st.chat_message(message["role"],avatar=av_us):
134
- st.markdown(message["content"])
135
- else:
136
- with st.chat_message(message["role"],avatar=av_ass):
137
- st.markdown(message["content"])
138
-
139
- # Accept user input using text_area and form for more dynamic updates
140
- with st.form(key='chat_form', clear_on_submit=False): # clear_on_submit=False ์ค‘์š”! ํผ ๋‚ด์šฉ ์œ ์ง€
141
- myprompt = st.text_area("What is an AI model?", key="prompt_input", height=100) # text_area ์‚ฌ์šฉ
142
- col1, col2 = st.columns([0.8, 0.2])
143
- with col2:
144
- submit_button = st.form_submit_button(label='Send') # Send ๋ฒ„ํŠผ์€ ์„ ํƒ์ ์œผ๋กœ ์œ ์ง€ ๋˜๋Š” ์ œ๊ฑฐ
145
-
146
- if myprompt: # myprompt ๊ฐ€ ์ž…๋ ฅ๋˜๋ฉด (text_area ๋‚ด์šฉ์ด ๋ณ€๊ฒฝ๋˜๋ฉด)
147
- # Add user message to chat history
148
- st.session_state.messages.append({"role": "user", "content": myprompt})
149
- st.session_state.chatMessages.append({"role": "user", "content": myprompt})
150
- st.session_state.numOfTurns = len(st.session_state.messages)
151
- # Display user message in chat message container
152
- with st.chat_message("user", avatar=av_us):
153
- st.markdown(myprompt)
154
- usertext = f"user: {myprompt}"
155
- writehistory(st.session_state.logfilename,usertext)
156
- # Display assistant response in chat message container
157
- with st.chat_message("assistant",avatar=av_ass):
158
- message_placeholder = st.empty()
159
- with st.spinner("Thinking..."):
160
- start = datetime.datetime.now()
161
- response = ''
162
- conv_messages = []
163
- if st.session_state.turns:
164
- if st.session_state.numOfTurns > st.session_state.maxTurns:
165
- conv_messages = st.session_state.messages[-st.session_state.maxTurns:]
166
- actualTurns.markdown(f"*Chat History Lenght*: :red[Trimmed]")
 
167
  else:
168
- conv_messages = st.session_state.messages
169
- else:
170
- conv_messages.append(st.session_state.messages[-1])
171
-
172
- full_response = ""
173
- model_inputs = tokenizer.apply_chat_template(conv_messages,
174
- add_generation_prompt=True,
175
- tokenize=True,
176
- return_tensors="pt")
177
- generate_kwargs = dict(input_ids=model_inputs,
178
- max_new_tokens=st.session_state.maxlength,
179
- temperature=st.session_state.temperature,
180
- do_sample=True,
181
- top_p=0.5,
182
- repetition_penalty=st.session_state.repeat,
183
- streamer=streamer)
184
-
185
- # ๋น„๋™๊ธฐ์ ์œผ๋กœ ๋ชจ๋ธ ์ƒ์„ฑ ์‹คํ–‰ (asyncio ์‚ฌ์šฉ)
186
- async def generate_response():
187
- t1 = Thread(target=ov_model.generate, kwargs=generate_kwargs)
188
- t1.start()
189
- start_time = datetime.datetime.now()
190
- partial_text = ""
191
- first_token = 0
192
- for chunk in streamer:
193
- if first_token == 0:
194
- ttft = datetime.datetime.now() - start_time
195
- first_token = 1
196
- for char in chunk:
197
- partial_text += char
198
- message_placeholder.markdown(partial_text + "๐ŸŸก")
199
- sleep(0.005) # ๋” ๋น ๋ฅธ ํƒ€์ž๊ธฐ ํšจ๊ณผ (0.005์ดˆ๋กœ ๊ฐ์†Œ)
200
- full_response += chunk
 
 
 
 
 
 
201
 
202
  delta_time = datetime.datetime.now() - start_time
203
- total_seconds = delta_time.total_seconds()
204
  prompt_tokens = len(encoding.encode(myprompt))
205
  assistant_tokens = len(encoding.encode(full_response))
206
- total_tokens = prompt_tokens + assistant_tokens
207
- st.session_state.speed = total_tokens / total_seconds
208
- statspeed.markdown(f'๐Ÿ’ซ speed: {st.session_state.speed:.2f} t/s')
209
-
210
- delta_time = datetime.datetime.now() - start_time
211
- total_seconds = delta_time.total_seconds()
212
- ttf_seconds = ttft.total_seconds()
213
- prompt_tokens = len(encoding.encode(myprompt))
214
- assistant_tokens = len(encoding.encode(full_response))
215
- total_tokens = prompt_tokens + assistant_tokens
216
- st.session_state.speed = total_tokens / total_seconds
217
- statspeed.markdown(f'๐Ÿ’ซ speed: {st.session_state.speed:.2f} t/s')
218
- toregister = full_response + f"""
219
- ```
220
- ๐Ÿงพ prompt tokens: {prompttokens}
221
- ๐Ÿ“ˆ generated tokens: {assistanttokens}
222
- โณ generation time: {delta}
223
- ๐Ÿ’ซ speed: {st.session_state.speed:.3f} t/s
224
- ๐Ÿš€ time to first token: {ttfseconds:.2f} seconds
225
- ```"""
226
- message_placeholder.markdown(toregister)
227
- asstext = f"assistant: {toregister}"
228
- writehistory(st.session_state.logfilename, asstext)
229
- st.session_state.messages.append({"role": "assistant", "content": full_response})
230
- st.session_state.chatMessages.append({"role": "assistant", "content": toregister})
231
- st.session_state.numOfTurns = len(st.session_state.messages)
232
-
233
- asyncio.run(generate_response()) # ๋น„๋™๊ธฐ ํ•จ์ˆ˜ ์‹คํ–‰
 
12
  import tiktoken
13
  import asyncio # ๋น„๋™๊ธฐ ์ฒ˜๋ฆฌ๋ฅผ ์œ„ํ•ด asyncio ์ถ”๊ฐ€
14
 
15
+ # requirements.txt ํŒŒ์ผ ํ•„์š”:
16
+ # optimum[openvino]
17
+ # transformers
18
+ # streamlit
19
+ # tiktoken
20
+ # asyncio
21
+
22
+ # ํ† ํฐ ์ˆ˜ ๊ณ„์‚ฐ์„ ์œ„ํ•œ ์ธ์ฝ”๋”ฉ ์„ค์ •
23
  encoding = tiktoken.get_encoding("cl100k_base")
24
 
25
+ # ๋ชจ๋ธ ์ด๋ฆ„ ๋ฐ ID ์„ค์ • (๋ณ€์ˆ˜ ํ†ต์ผ)
26
+ model_name = "Gemma2-2B-it"
27
+ model_id = "AIFunOver/gemma-2-2b-it-openvino-4bit" # Hugging Face Hub ๋ชจ๋ธ ID
28
 
29
+ # ์›นํŽ˜์ด์ง€ ๊ธฐ๋ณธ ์„ค์ •
 
30
  st.set_page_config(
31
+ page_title=f"Your LocalGPT โœจ with {model_name}",
32
  page_icon="๐ŸŒŸ",
33
  layout="wide")
34
 
35
+ # Session State ์ดˆ๊ธฐํ™” (Hugging Face Space ์žฌ์‹คํ–‰ ์‹œ ์ƒํƒœ ์œ ์ง€)
36
  if "hf_model" not in st.session_state:
37
+ st.session_state.hf_model = model_name
 
38
  if "messages" not in st.session_state:
39
  st.session_state.messages = []
 
 
40
  if "chatMessages" not in st.session_state:
41
  st.session_state.chatMessages = []
 
42
  if "repeat" not in st.session_state:
43
  st.session_state.repeat = 1.35
 
44
  if "temperature" not in st.session_state:
45
  st.session_state.temperature = 0.1
 
46
  if "maxlength" not in st.session_state:
47
  st.session_state.maxlength = 500
 
48
  if "speed" not in st.session_state:
49
  st.session_state.speed = 0.0
 
50
  if "numOfTurns" not in st.session_state:
51
  st.session_state.numOfTurns = 0
 
52
  if "maxTurns" not in st.session_state:
53
+ st.session_state.maxTurns = 5 # must be odd number, greater than equal to 5
54
+ if "logfilename" not in st.session_state:
55
+ ## Logger file
56
+ logfile = f'logs/Gemma2-2B_{genRANstring(5)}_log.txt' # Space ๋ฃจํŠธ์˜ logs ํด๋”์— ์ €์žฅ
57
+ st.session_state.logfilename = logfile
58
+ # Write in the history the first 2 sessions
59
+ writehistory(st.session_state.logfilename,f'{str(datetime.datetime.now())}\n\nYour own LocalGPT with ๐ŸŒ€ {model_name}\n---\n๐Ÿง ๐Ÿซก: You are a helpful assistant.')
60
+ writehistory(st.session_state.logfilename,f'๐ŸŒ€: How may I help you today?')
61
+
62
 
63
  def writehistory(filename,text):
64
+ try:
65
+ with open(filename, 'a', encoding='utf-8') as f:
66
+ f.write(text)
67
+ f.write('\n')
68
+ f.close()
69
+ except Exception as e:
70
+ print(f"Error writing to log file: {e}") # Log error to console
71
 
72
  def genRANstring(n):
73
  """
 
81
 
82
  @st.cache_resource
83
  def create_chat():
84
+ try:
85
  tokenizer = AutoTokenizer.from_pretrained(model_id)
86
  ov_model = OVModelForCausalLM.from_pretrained(
87
  model_id = model_id,
 
92
  #Credit to https://github.com/openvino-dev-samples/chatglm3.openvino/blob/main/chat.py
93
  streamer = TextIteratorStreamer(tokenizer, timeout=60.0, skip_prompt=True, skip_special_tokens=True)
94
  return tokenizer, ov_model, streamer
95
+ except Exception as e:
96
+ st.error(f"Error loading model: {e}")
97
+ return None, None, None # Return None values to indicate failure
98
 
99
  @st.cache_resource
100
  def countTokens(text):
 
102
  numoftokens = len(encoding.encode(text))
103
  return numoftokens
104
 
 
 
 
 
 
 
 
 
 
105
 
106
+ #AVATARS - using emojis instead of images
107
+ av_us = "๐Ÿ‘ค" # User avatar emoji
108
+ av_ass = "๐Ÿค–" # Assistant avatar emoji
109
  nCTX = 8192
110
  ### START STREAMLIT UI
111
+ # Create a header element - using markdown instead of image
112
+ st.header(f"๐ŸŒŸ {model_name} Chatbot")
113
+ st.markdown(f"> *๐ŸŒŸ {model_name} with {nCTX} tokens Context window* - Turn based Chat available with max capacity of :orange[**{st.session_state.maxTurns} messages**].", unsafe_allow_html=True)
114
+ st.markdown(f"#### Powered by OpenVINO")
 
115
 
116
+ # CREATE THE SIDEBAR - using markdown and text instead of images
117
  with st.sidebar:
118
+ st.subheader("Configuration") # Sidebar header
119
+ # st.image('images/banner.png', use_column_width=True) # Removed image
120
+ st.markdown("---")
121
+ st.markdown("**Model Parameters**")
122
  st.session_state.temperature = st.slider('Temperature:', min_value=0.0, max_value=1.0, value=0.65, step=0.01)
123
  st.session_state.maxlength = st.slider('Length reply:', min_value=150, max_value=2000,
124
  value=550, step=50)
125
  st.session_state.repeat = st.slider('Repeat Penalty:', min_value=0.0, max_value=2.0, value=1.176, step=0.02)
126
+ st.markdown("---")
127
+ st.markdown("**Chat Options**")
128
  st.session_state.turns = st.toggle('Turn based', value=False, help='Activate Conversational Turn Chat with History',
129
  disabled=False, label_visibility="visible")
130
  st.markdown(f"*Number of Max Turns*: {st.session_state.maxTurns}")
131
  actualTurns = st.markdown(f"*Chat History Lenght*: :green[Good]")
132
  statspeed = st.markdown(f'๐Ÿ’ซ speed: {st.session_state.speed} t/s')
133
  btnClear = st.button("Clear History",type="primary", use_container_width=True)
134
+ st.markdown("---")
135
+ st.markdown("**Logs**")
136
  st.markdown(f"**Logfile**: {st.session_state.logfilename}")
137
 
138
+ tokenizer, ov_model, streamer = create_chat()
139
+
140
+ if tokenizer and ov_model and streamer: # Only proceed if model loading was successful
141
+ # Display chat messages from history on app rerun
142
+ for message in st.session_state.chatMessages:
143
+ if message["role"] == "user":
144
+ with st.chat_message(message["role"],avatar=av_us):
145
+ st.markdown(message["content"])
146
+ else:
147
+ with st.chat_message(message["role"],avatar=av_ass):
148
+ st.markdown(message["content"])
149
+
150
+ # Accept user input using text_area and form for more dynamic updates
151
+ with st.form(key='chat_form', clear_on_submit=False): # clear_on_submit=False ์ค‘์š”! ํผ ๋‚ด์šฉ ์œ ์ง€, ์ œ์ถœ ๋ฒ„ํŠผ ์ œ๊ฑฐ
152
+ myprompt = st.text_area("What is an AI model?", key="prompt_input", height=100) # text_area ์‚ฌ์šฉ
153
+
154
+
155
+ if myprompt: # myprompt ๊ฐ€ ์ž…๋ ฅ๋˜๋ฉด (text_area ๋‚ด์šฉ์ด ๋ณ€๊ฒฝ๋˜๋ฉด)
156
+ # Add user message to chat history
157
+ st.session_state.messages.append({"role": "user", "content": myprompt})
158
+ st.session_state.chatMessages.append({"role": "user", "content": myprompt})
159
+ st.session_state.numOfTurns = len(st.session_state.messages)
160
+ # Display user message in chat message container
161
+ with st.chat_message("user", avatar=av_us):
162
+ st.markdown(myprompt)
163
+ usertext = f"user: {myprompt}"
164
+ writehistory(st.session_state.logfilename,usertext)
165
+ # Display assistant response in chat message container
166
+ with st.chat_message("assistant",avatar=av_ass):
167
+ message_placeholder = st.empty()
168
+ with st.spinner("Thinking..."):
169
+ start = datetime.datetime.now()
170
+ response = ''
171
+ conv_messages = []
172
+ if st.session_state.turns:
173
+ if st.session_state.numOfTurns > st.session_state.maxTurns:
174
+ conv_messages = st.session_state.messages[-st.session_state.maxTurns:]
175
+ actualTurns.markdown(f"*Chat History Lenght*: :red[Trimmed]")
176
+ else:
177
+ conv_messages = st.session_state.messages
178
  else:
179
+ conv_messages.append(st.session_state.messages[-1])
180
+
181
+ full_response = ""
182
+ model_inputs = tokenizer.apply_chat_template(conv_messages,
183
+ add_generation_prompt=True,
184
+ tokenize=True,
185
+ return_tensors="pt")
186
+ generate_kwargs = dict(input_ids=model_inputs,
187
+ max_new_tokens=st.session_state.maxlength,
188
+ temperature=st.session_state.temperature,
189
+ do_sample=True,
190
+ top_p=0.5,
191
+ repetition_penalty=st.session_state.repeat,
192
+ streamer=streamer)
193
+
194
+ # ๋น„๋™๊ธฐ์ ์œผ๋กœ ๋ชจ๋ธ ์ƒ์„ฑ ์‹คํ–‰ (asyncio ์‚ฌ์šฉ)
195
+ async def generate_response():
196
+ t1 = Thread(target=ov_model.generate, kwargs=generate_kwargs)
197
+ t1.start()
198
+ start_time = datetime.datetime.now()
199
+ partial_text = ""
200
+ first_token = 0
201
+ for chunk in streamer:
202
+ if first_token == 0:
203
+ ttft = datetime.datetime.now() - start_time
204
+ first_token = 1
205
+ for char in chunk:
206
+ partial_text += char
207
+ message_placeholder.markdown(partial_text + "๐ŸŸก")
208
+ sleep(0.005) # ๋” ๋น ๋ฅธ ํƒ€์ž๊ธฐ ํšจ๊ณผ (0.005์ดˆ๋กœ ๊ฐ์†Œ, ํ•„์š”์— ๋”ฐ๋ผ ์กฐ์ ˆ)
209
+ full_response += chunk
210
+
211
+ delta_time = datetime.datetime.now() - start_time
212
+ total_seconds = delta_time.total_seconds()
213
+ prompt_tokens = len(encoding.encode(myprompt))
214
+ assistant_tokens = len(encoding.encode(full_response))
215
+ total_tokens = prompt_tokens + assistant_tokens
216
+ st.session_state.speed = total_tokens / total_seconds
217
+ statspeed.markdown(f'๐Ÿ’ซ speed: {st.session_state.speed:.2f} t/s')
218
 
219
  delta_time = datetime.datetime.now() - start_time
 
220
  prompt_tokens = len(encoding.encode(myprompt))
221
  assistant_tokens = len(encoding.encode(full_response))
222
+
223
+ message_placeholder.markdown(full_response) # Display only the response, without stats
224
+ asstext = f"assistant: {full_response}"
225
+ writehistory(st.session_state.logfilename, asstext)
226
+ st.session_state.messages.append({"role": "assistant", "content": full_response})
227
+ st.session_state.chatMessages.append({"role": "assistant", "content": full_response}) # Store just the response
228
+ st.session_state.numOfTurns = len(st.session_state.messages)
229
+
230
+ asyncio.run(generate_response()) # ๋น„๋™๊ธฐ ํ•จ์ˆ˜ ์‹คํ–‰
231
+
232
+ if btnClear: # Clear History ๋ฒ„ํŠผ ํด๋ฆญ ์‹œ
233
+ st.session_state.messages = []
234
+ st.session_state.chatMessages = []
235
+ st.session_state.numOfTurns = 0
236
+ st.rerun() # Streamlit ์•ฑ ๋‹ค์‹œ ์‹คํ–‰
237
+ else:
238
+ st.error("Model initialization failed. Please check the logs for details.")