kimhyunwoo commited on
Commit
c9ceb74
Β·
verified Β·
1 Parent(s): dc10a73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +138 -51
app.py CHANGED
@@ -4,11 +4,14 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import gc
5
  import os
6
  import datetime
 
7
 
 
8
  MODEL_ID = "naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B"
9
  MAX_NEW_TOKENS = 512
10
  CPU_THREAD_COUNT = 4 # ν•„μš”μ‹œ 쑰절
11
 
 
12
  # torch.set_num_threads(CPU_THREAD_COUNT)
13
  # os.environ["OMP_NUM_THREADS"] = str(CPU_THREAD_COUNT)
14
  # os.environ["MKL_NUM_THREADS"] = str(CPU_THREAD_COUNT)
@@ -18,60 +21,62 @@ print(f"PyTorch version: {torch.__version__}")
18
  print(f"Running on device: cpu")
19
  print(f"Torch Threads: {torch.get_num_threads()}")
20
 
 
21
  print(f"--- Loading Model: {MODEL_ID} ---")
22
  print("This might take a few minutes, especially on the first launch...")
23
 
24
  model = None
25
  tokenizer = None
 
26
 
27
  try:
 
28
  model = AutoModelForCausalLM.from_pretrained(
29
  MODEL_ID,
30
  torch_dtype=torch.float32,
31
  device_map="cpu",
32
- force_download=True # 이전 였λ₯˜ 해결을 μœ„ν•΄ μœ μ§€ (ν•„μš” μ—†μœΌλ©΄ False λ˜λŠ” 제거)
33
  )
34
  tokenizer = AutoTokenizer.from_pretrained(
35
  MODEL_ID,
36
- force_download=True # 이전 였λ₯˜ 해결을 μœ„ν•΄ μœ μ§€ (ν•„μš” μ—†μœΌλ©΄ False λ˜λŠ” 제거)
37
  )
38
  model.eval()
39
- print("--- Model and Tokenizer Loaded Successfully on CPU ---")
 
 
40
 
 
41
  stop_token_strings = ["<|endofturn|>", "<|stop|>"]
42
  stop_token_ids_list = [tokenizer.convert_tokens_to_ids(token) for token in stop_token_strings]
43
 
44
- # λͺ¨λΈ ν† ν¬λ‚˜μ΄μ €μ— eos_token_idκ°€ μ œλŒ€λ‘œ μ„€μ •λ˜μ–΄ μžˆλŠ”μ§€ 확인
45
- if tokenizer.eos_token is not None and tokenizer.eos_token_id not in stop_token_ids_list:
46
- # eos_token_idκ°€ None이 μ•„λ‹ˆκ³  λ¦¬μŠ€νŠΈμ— 없을 λ•Œλ§Œ μΆ”κ°€
47
- if tokenizer.eos_token_id is not None:
48
- stop_token_ids_list.append(tokenizer.eos_token_id)
49
- else:
50
- print("Warning: tokenizer.eos_token_id is None. Cannot add to stop tokens.")
51
- elif tokenizer.eos_token is None:
52
- print("Warning: tokenizer.eos_token is not defined.")
53
-
54
 
55
  stop_token_ids_list = [tid for tid in stop_token_ids_list if tid is not None]
56
 
57
  if not stop_token_ids_list:
58
- print("Warning: Could not find any stop token IDs. Generation might not stop correctly.")
59
- # Fallback: λ§Œμ•½ eos 토큰 ID도 μ—†λ‹€λ©΄, generationμ—μ„œ λ¬Έμ œκ°€ 생길 수 있음
60
- # ν•„μš”ν•˜λ‹€λ©΄ κΈ°λ³Έ eos 토큰 IDλ₯Ό ν•˜λ“œμ½”λ”©ν•˜κ±°λ‚˜ λ‹€λ₯Έ λ°©μ‹μœΌλ‘œ μ²˜λ¦¬ν•΄μ•Ό ν•  수 있음
61
- # 예: stop_token_ids_list = [some_default_eos_id]
 
 
62
 
63
  print(f"Using Stop Token IDs: {stop_token_ids_list}")
64
 
65
  except Exception as e:
66
  print(f"!!! Error loading model: {e}")
67
- if 'model' in locals() and model is not None:
68
- del model
69
- if 'tokenizer' in locals() and tokenizer is not None:
70
- del tokenizer
71
  gc.collect()
72
- raise gr.Error(f"Failed to load the model {MODEL_ID}. Please check the Space logs. Error: {e}")
 
73
 
74
 
 
75
  def get_system_prompt():
76
  current_date = datetime.datetime.now().strftime("%Y-%m-%d (%A)")
77
  return (
@@ -80,72 +85,143 @@ def get_system_prompt():
80
  f"- μ‚¬μš©μžμ˜ μ§ˆλ¬Έμ— λŒ€ν•΄ μΉœμ ˆν•˜κ³  μžμ„Έν•˜κ²Œ ν•œκ΅­μ–΄λ‘œ λ‹΅λ³€ν•΄μ•Ό ν•œλ‹€."
81
  )
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  def predict(message, history):
 
 
 
 
 
 
 
 
84
  system_prompt = get_system_prompt()
85
 
 
86
  chat_history_formatted = [
87
  {"role": "tool_list", "content": ""},
88
  {"role": "system", "content": system_prompt}
89
  ]
90
- # historyκ°€ (user, ai) νŠœν”Œ 리슀트라고 κ°€μ •
91
- for user_msg, ai_msg in history:
92
- chat_history_formatted.append({"role": "user", "content": user_msg})
93
- chat_history_formatted.append({"role": "assistant", "content": ai_msg})
 
 
 
 
 
 
94
 
 
95
  chat_history_formatted.append({"role": "user", "content": message})
96
 
97
  inputs = None
98
  output_ids = None
99
 
100
  try:
101
- # device_map="cpu"둜 λͺ¨λΈμ„ λ‘œλ“œν–ˆμœΌλ―€λ‘œ, inputs도 cpu둜 λ³΄λƒ…λ‹ˆλ‹€.
102
  inputs = tokenizer.apply_chat_template(
103
  chat_history_formatted,
104
  add_generation_prompt=True,
105
  return_dict=True,
106
  return_tensors="pt"
107
- ).to("cpu") # λͺ…μ‹œμ μœΌλ‘œ CPU μ§€μ •
108
  input_length = inputs['input_ids'].shape[1]
109
  print(f"\nInput tokens: {input_length}")
110
 
111
  except Exception as e:
112
  print(f"!!! Error applying chat template: {e}")
 
113
  return f"였λ₯˜: μž…λ ₯ ν˜•μ‹μ„ μ²˜λ¦¬ν•˜λŠ” 쀑 λ¬Έμ œκ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. ({e})"
114
 
115
  try:
116
  print("Generating response...")
 
117
  with torch.no_grad():
118
- # eos_token_id에 리슀트λ₯Ό μ „λ‹¬ν•˜λŠ” 것이 μΌλ°˜μ μž…λ‹ˆλ‹€.
119
  output_ids = model.generate(
120
  **inputs,
121
  max_new_tokens=MAX_NEW_TOKENS,
122
- eos_token_id=stop_token_ids_list, # μˆ˜μ •λœ stop_token_ids_list μ‚¬μš©
123
- pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id, # pad_token_id 확인
124
  do_sample=True,
125
  temperature=0.7,
126
  top_p=0.9,
127
  )
128
- print("Generation complete.")
 
129
 
130
  except Exception as e:
131
  print(f"!!! Error during model generation: {e}")
 
132
  if inputs is not None: del inputs
133
  if output_ids is not None: del output_ids
134
  gc.collect()
135
  return f"였λ₯˜: 응닡을 μƒμ„±ν•˜λŠ” 쀑 λ¬Έμ œκ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. ({e})"
136
 
137
- # output_idsκ°€ None이 아닐 κ²½μš°μ—λ§Œ λ””μ½”λ”© μ‹œλ„
 
138
  if output_ids is not None:
139
- new_tokens = output_ids[0, input_length:]
140
- response = tokenizer.decode(new_tokens, skip_special_tokens=True)
141
- print(f"Output tokens: {len(new_tokens)}")
142
- del new_tokens # λ©”λͺ¨λ¦¬ 정리
143
- else:
144
- response = "였λ₯˜: 응닡 생성에 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€ (output_ids is None)."
145
- print("Generation failed, output_ids is None.")
 
146
 
147
 
148
- # λ©”λͺ¨λ¦¬ 정리
149
  if inputs is not None: del inputs
150
  if output_ids is not None: del output_ids
151
  gc.collect()
@@ -153,15 +229,15 @@ def predict(message, history):
153
 
154
  return response
155
 
156
- # --- Gradio Interface ---
157
  print("--- Setting up Gradio Interface ---")
158
 
159
- # UserWarning ν•΄κ²° 및 μ΅œμ‹  ν˜•μ‹ μ‚¬μš©
160
  chatbot_component = gr.Chatbot(
161
  label="HyperCLOVA X SEED (0.5B) λŒ€ν™”",
162
  bubble_full_width=False,
163
  height=600,
164
- type='messages' # message ν˜•μ‹ μ‚¬μš© λͺ…μ‹œ
165
  )
166
 
167
  examples = [
@@ -171,23 +247,34 @@ examples = [
171
  ["μ œμ£Όλ„ μ—¬ν–‰ κ³„νšμ„ μ„Έμš°κ³  μžˆλŠ”λ°, 3λ°• 4일 μΆ”μ²œ μ½”μŠ€ μ’€ μ§œμ€„λž˜?"],
172
  ]
173
 
174
- # λ¬Έμ œκ°€ 된 인자(retry_btn, undo_btn, clear_btn) 제거
175
  demo = gr.ChatInterface(
176
- fn=predict,
177
- chatbot=chatbot_component,
178
  title="πŸ‡°πŸ‡· 넀이버 HyperCLOVA X SEED (0.5B) 데λͺ¨",
179
  description=(
180
  f"**λͺ¨λΈ:** {MODEL_ID}\n"
181
  f"**ν™˜κ²½:** Hugging Face 무료 CPU (16GB RAM)\n"
182
- f"**주의:** CPUμ—μ„œ μ‹€ν–‰λ˜λ―€λ‘œ 응닡 생성에 λ‹€μ†Œ μ‹œκ°„μ΄ 걸릴 수 μžˆμŠ΅λ‹ˆλ‹€ (특히 첫 응닡). "
183
  f"μ΅œλŒ€ 생성 토큰 μˆ˜λŠ” {MAX_NEW_TOKENS}개둜 μ œν•œλ©λ‹ˆλ‹€."
184
  ),
185
  examples=examples,
186
- cache_examples=False,
187
  theme="soft",
 
188
  )
189
 
190
- # --- Launch the App ---
191
  if __name__ == "__main__":
 
 
 
 
 
 
192
  print("--- Launching Gradio App ---")
193
- demo.queue().launch()
 
 
 
 
 
4
  import gc
5
  import os
6
  import datetime
7
+ import time
8
 
9
+ # --- Configuration ---
10
  MODEL_ID = "naver-hyperclovax/HyperCLOVAX-SEED-Text-Instruct-0.5B"
11
  MAX_NEW_TOKENS = 512
12
  CPU_THREAD_COUNT = 4 # ν•„μš”μ‹œ 쑰절
13
 
14
+ # --- Optional: Set CPU Threads ---
15
  # torch.set_num_threads(CPU_THREAD_COUNT)
16
  # os.environ["OMP_NUM_THREADS"] = str(CPU_THREAD_COUNT)
17
  # os.environ["MKL_NUM_THREADS"] = str(CPU_THREAD_COUNT)
 
21
  print(f"Running on device: cpu")
22
  print(f"Torch Threads: {torch.get_num_threads()}")
23
 
24
+ # --- Model and Tokenizer Loading ---
25
  print(f"--- Loading Model: {MODEL_ID} ---")
26
  print("This might take a few minutes, especially on the first launch...")
27
 
28
  model = None
29
  tokenizer = None
30
+ load_successful = False
31
 
32
  try:
33
+ start_load_time = time.time()
34
  model = AutoModelForCausalLM.from_pretrained(
35
  MODEL_ID,
36
  torch_dtype=torch.float32,
37
  device_map="cpu",
38
+ # force_download=True # 주석 처리. μΊμ‹œ λ¬Έμ œκ°€ μ—†λ‹€λ©΄ λΆˆν•„μš”
39
  )
40
  tokenizer = AutoTokenizer.from_pretrained(
41
  MODEL_ID,
42
+ # force_download=True # 주석 처리
43
  )
44
  model.eval()
45
+ load_time = time.time() - start_load_time
46
+ print(f"--- Model and Tokenizer Loaded Successfully on CPU in {load_time:.2f} seconds ---")
47
+ load_successful = True
48
 
49
+ # --- Stop Token Configuration ---
50
  stop_token_strings = ["<|endofturn|>", "<|stop|>"]
51
  stop_token_ids_list = [tokenizer.convert_tokens_to_ids(token) for token in stop_token_strings]
52
 
53
+ if tokenizer.eos_token_id is not None and tokenizer.eos_token_id not in stop_token_ids_list:
54
+ stop_token_ids_list.append(tokenizer.eos_token_id)
55
+ elif tokenizer.eos_token_id is None:
56
+ print("Warning: tokenizer.eos_token_id is None. Cannot add to stop tokens.")
 
 
 
 
 
 
57
 
58
  stop_token_ids_list = [tid for tid in stop_token_ids_list if tid is not None]
59
 
60
  if not stop_token_ids_list:
61
+ print("Warning: Could not find any stop token IDs. Using default EOS if available, otherwise generation might not stop correctly.")
62
+ if tokenizer.eos_token_id is not None:
63
+ stop_token_ids_list = [tokenizer.eos_token_id]
64
+ else:
65
+ print("Error: No stop tokens found, including default EOS. Generation may run indefinitely.")
66
+ # ν•„μš”μ‹œ μ—λŸ¬ 처리 λ˜λŠ” κΈ°λ³Έκ°’ μ„€μ •
67
 
68
  print(f"Using Stop Token IDs: {stop_token_ids_list}")
69
 
70
  except Exception as e:
71
  print(f"!!! Error loading model: {e}")
72
+ if 'model' in locals() and model is not None: del model
73
+ if 'tokenizer' in locals() and tokenizer is not None: del tokenizer
 
 
74
  gc.collect()
75
+ # μ•± μ‹€ν–‰ 전에 λ‘œλ”© μ‹€νŒ¨ μ‹œ Gradio μ—λŸ¬ λŒ€μ‹  ν”„λ‘œμ„ΈμŠ€ μ’…λ£Œ λ˜λŠ” λ‹€λ₯Έ 처리 κ³ λ €
76
+ raise gr.Error(f"Failed to load the model {MODEL_ID}. Cannot start the application. Error: {e}")
77
 
78
 
79
+ # --- System Prompt Definition ---
80
  def get_system_prompt():
81
  current_date = datetime.datetime.now().strftime("%Y-%m-%d (%A)")
82
  return (
 
85
  f"- μ‚¬μš©μžμ˜ μ§ˆλ¬Έμ— λŒ€ν•΄ μΉœμ ˆν•˜κ³  μžμ„Έν•˜κ²Œ ν•œκ΅­μ–΄λ‘œ λ‹΅λ³€ν•΄μ•Ό ν•œλ‹€."
86
  )
87
 
88
+ # --- Warm-up Function ---
89
+ def warmup_model():
90
+ if not load_successful or model is None or tokenizer is None:
91
+ print("Skipping warmup: Model not loaded successfully.")
92
+ return
93
+
94
+ print("--- Starting Model Warm-up ---")
95
+ try:
96
+ start_warmup_time = time.time()
97
+ warmup_message = "μ•ˆοΏ½οΏ½ν•˜μ„Έμš”"
98
+ system_prompt = get_system_prompt()
99
+ warmup_chat = [
100
+ {"role": "tool_list", "content": ""},
101
+ {"role": "system", "content": system_prompt},
102
+ {"role": "user", "content": warmup_message}
103
+ ]
104
+
105
+ inputs = tokenizer.apply_chat_template(
106
+ warmup_chat,
107
+ add_generation_prompt=True,
108
+ return_dict=True,
109
+ return_tensors="pt"
110
+ ).to("cpu")
111
+
112
+ with torch.no_grad():
113
+ output_ids = model.generate(
114
+ **inputs,
115
+ max_new_tokens=10, # 짧게 μƒμ„±ν•˜μ—¬ μ‹œκ°„ μ ˆμ•½
116
+ eos_token_id=stop_token_ids_list,
117
+ pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
118
+ do_sample=False # Warm-up μ‹œμ—λŠ” μƒ˜ν”Œλ§ λΆˆν•„μš”
119
+ )
120
+
121
+ # κ²°κ³Ό λ””μ½”λ”© (선택 사항, ν™•μΈμš©)
122
+ # response = tokenizer.decode(output_ids[0, inputs['input_ids'].shape[1]:], skip_special_tokens=True)
123
+ # print(f"Warm-up response (decoded): {response}")
124
+
125
+ del inputs
126
+ del output_ids
127
+ gc.collect()
128
+ warmup_time = time.time() - start_warmup_time
129
+ print(f"--- Model Warm-up Completed in {warmup_time:.2f} seconds ---")
130
+
131
+ except Exception as e:
132
+ print(f"!!! Error during model warm-up: {e}")
133
+ # μ›œμ—… μ‹€νŒ¨κ°€ μ•± 싀행을 λ§‰μ§€λŠ” μ•Šλ„λ‘ 처리
134
+ finally:
135
+ gc.collect() # Ensure cleanup even if warmup fails
136
+
137
+
138
+ # --- Inference Function ---
139
  def predict(message, history):
140
+ """
141
+ Generates response using HyperCLOVAX based on user message and chat history.
142
+ Handles chat formatting, generation, decoding, and memory management.
143
+ Assumes 'history' is in the Gradio 'messages' format: List[List[str | None | Tuple]] or List[Dict]
144
+ """
145
+ if model is None or tokenizer is None:
146
+ return "였λ₯˜: λͺ¨λΈμ΄ λ‘œλ“œλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."
147
+
148
  system_prompt = get_system_prompt()
149
 
150
+ # history ν˜•μ‹μ΄ List[Dict] ('messages' format)라고 κ°€μ •ν•˜κ³  처리
151
  chat_history_formatted = [
152
  {"role": "tool_list", "content": ""},
153
  {"role": "system", "content": system_prompt}
154
  ]
155
+ # historyλŠ” [{'role': 'user', 'content': '...'}, {'role': 'assistant', 'content': '...'}] ν˜•νƒœ
156
+ for turn in history:
157
+ # history의 각 μš”μ†Œκ°€ λ”•μ…”λ„ˆλ¦¬ ν˜•νƒœμΈμ§€ 확인 (더 μ•ˆμ „ν•˜κ²Œ)
158
+ if isinstance(turn, dict) and "role" in turn and "content" in turn:
159
+ chat_history_formatted.append(turn)
160
+ else:
161
+ # μ˜ˆμƒμΉ˜ λͺ»ν•œ ν˜•μ‹μ΄ λ“€μ–΄μ˜¬ 경우 κ²½κ³  좜λ ₯ (λ””λ²„κΉ…μš©)
162
+ print(f"Warning: Unexpected history format item: {turn}")
163
+ # ν•„μš”ν•˜λ‹€λ©΄ μ—¬κΈ°μ„œ μ—λŸ¬ 처리 λ˜λŠ” λ³€ν™˜ 둜직 μΆ”κ°€
164
+
165
 
166
+ # Add the latest user message
167
  chat_history_formatted.append({"role": "user", "content": message})
168
 
169
  inputs = None
170
  output_ids = None
171
 
172
  try:
 
173
  inputs = tokenizer.apply_chat_template(
174
  chat_history_formatted,
175
  add_generation_prompt=True,
176
  return_dict=True,
177
  return_tensors="pt"
178
+ ).to("cpu") # Explicitly send to CPU
179
  input_length = inputs['input_ids'].shape[1]
180
  print(f"\nInput tokens: {input_length}")
181
 
182
  except Exception as e:
183
  print(f"!!! Error applying chat template: {e}")
184
+ # Provide feedback to the user
185
  return f"였λ₯˜: μž…λ ₯ ν˜•μ‹μ„ μ²˜λ¦¬ν•˜λŠ” 쀑 λ¬Έμ œκ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. ({e})"
186
 
187
  try:
188
  print("Generating response...")
189
+ generation_start_time = time.time()
190
  with torch.no_grad():
 
191
  output_ids = model.generate(
192
  **inputs,
193
  max_new_tokens=MAX_NEW_TOKENS,
194
+ eos_token_id=stop_token_ids_list,
195
+ pad_token_id=tokenizer.eos_token_id if tokenizer.eos_token_id is not None else tokenizer.pad_token_id,
196
  do_sample=True,
197
  temperature=0.7,
198
  top_p=0.9,
199
  )
200
+ generation_time = time.time() - generation_start_time
201
+ print(f"Generation complete in {generation_time:.2f} seconds.")
202
 
203
  except Exception as e:
204
  print(f"!!! Error during model generation: {e}")
205
+ # Clean up potentially large tensors in case of error
206
  if inputs is not None: del inputs
207
  if output_ids is not None: del output_ids
208
  gc.collect()
209
  return f"였λ₯˜: 응닡을 μƒμ„±ν•˜λŠ” 쀑 λ¬Έμ œκ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€. ({e})"
210
 
211
+ # Decode the response
212
+ response = "였λ₯˜: 응닡 생성에 μ‹€νŒ¨ν–ˆμŠ΅λ‹ˆλ‹€." # κΈ°λ³Έκ°’
213
  if output_ids is not None:
214
+ try:
215
+ new_tokens = output_ids[0, input_length:]
216
+ response = tokenizer.decode(new_tokens, skip_special_tokens=True)
217
+ print(f"Output tokens: {len(new_tokens)}")
218
+ del new_tokens
219
+ except Exception as e:
220
+ print(f"!!! Error decoding response: {e}")
221
+ response = "였λ₯˜: 응닡을 λ””μ½”λ”©ν•˜λŠ” 쀑 λ¬Έμ œκ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€."
222
 
223
 
224
+ # Clean up memory
225
  if inputs is not None: del inputs
226
  if output_ids is not None: del output_ids
227
  gc.collect()
 
229
 
230
  return response
231
 
232
+ # --- Gradio Interface Setup ---
233
  print("--- Setting up Gradio Interface ---")
234
 
235
+ # type='messages'λ₯Ό λͺ…μ‹œν•˜μ—¬ UserWarning ν•΄κ²° 및 μ΅œμ‹  ν˜•μ‹ μ‚¬μš©
236
  chatbot_component = gr.Chatbot(
237
  label="HyperCLOVA X SEED (0.5B) λŒ€ν™”",
238
  bubble_full_width=False,
239
  height=600,
240
+ type='messages' # 이 뢀뢄을 λͺ…μ‹œν•˜μ—¬ ChatInterfaceμ™€μ˜ ν˜Έν™˜μ„± 확보
241
  )
242
 
243
  examples = [
 
247
  ["μ œμ£Όλ„ μ—¬ν–‰ κ³„νšμ„ μ„Έμš°κ³  μžˆλŠ”λ°, 3λ°• 4일 μΆ”μ²œ μ½”μŠ€ μ’€ μ§œμ€„λž˜?"],
248
  ]
249
 
250
+ # ChatInterface 생성 μ‹œ λΆˆν•„μš”ν•œ 인자 제거됨
251
  demo = gr.ChatInterface(
252
+ fn=predict, # 예츑 ν•¨μˆ˜ μ—°κ²°
253
+ chatbot=chatbot_component, # Chatbot μ»΄ν¬λ„ŒνŠΈ μ‚¬μš© (type='messages' 섀정됨)
254
  title="πŸ‡°πŸ‡· 넀이버 HyperCLOVA X SEED (0.5B) 데λͺ¨",
255
  description=(
256
  f"**λͺ¨λΈ:** {MODEL_ID}\n"
257
  f"**ν™˜κ²½:** Hugging Face 무료 CPU (16GB RAM)\n"
258
+ f"**주의:** CPUμ—μ„œ μ‹€ν–‰λ˜λ―€λ‘œ 응닡 생성에 λ‹€μ†Œ μ‹œκ°„μ΄ 걸릴 수 μžˆμŠ΅λ‹ˆλ‹€. (μ›œμ—… μ‹œλ„λ¨)\n"
259
  f"μ΅œλŒ€ 생성 토큰 μˆ˜λŠ” {MAX_NEW_TOKENS}개둜 μ œν•œλ©λ‹ˆλ‹€."
260
  ),
261
  examples=examples,
262
+ cache_examples=False, # 무료 ν‹°μ–΄ 캐싱 λΉ„ν™œμ„±ν™”
263
  theme="soft",
264
+ # retry_btn, undo_btn, clear_btn 등은 μ΅œμ‹  λ²„μ „μ—μ„œ 직접 μ§€μ›ν•˜μ§€ μ•ŠμŒ
265
  )
266
 
267
+ # --- Application Launch ---
268
  if __name__ == "__main__":
269
+ # λͺ¨λΈ λ‘œλ”© 성곡 μ‹œμ—λ§Œ μ›œμ—… μ‹€ν–‰
270
+ if load_successful:
271
+ warmup_model()
272
+ else:
273
+ print("Skipping warm-up because model loading failed.")
274
+
275
  print("--- Launching Gradio App ---")
276
+ # queue()λŠ” μ—¬λŸ¬ μ‚¬μš©μž 처리 및 κΈ΄ μž‘μ—… 관리에 유용
277
+ demo.queue().launch(
278
+ # share=True # 곡개 링크 생성 μ‹œ ν•„μš” (둜그인 ν•„μš”ν•  수 있음)
279
+ # server_name="0.0.0.0" # 둜컬 λ„€νŠΈμ›Œν¬μ—μ„œ μ ‘κ·Ό ν—ˆμš© μ‹œ
280
+ )