mike23415 commited on
Commit
9f3e630
·
verified ·
1 Parent(s): bfa6fca

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -40
app.py CHANGED
@@ -56,13 +56,11 @@ def health():
56
  def chat():
57
  """Chat endpoint with BitNet streaming response"""
58
  global model_loaded, model, tokenizer
59
-
60
  if not model_loaded:
61
  return {
62
  "status": "initializing",
63
  "message": "Model is still loading. Please try again shortly."
64
  }, 503
65
-
66
  try:
67
  from transformers import TextIteratorStreamer
68
  data = request.get_json()
@@ -76,20 +74,16 @@ def chat():
76
  max_tokens = data.get("max_tokens", 512)
77
  temperature = data.get("temperature", 0.7)
78
  top_p = data.get("top_p", 0.95)
79
-
80
  messages = [{"role": "system", "content": system_message}]
81
  for user_msg, bot_msg in history:
82
  messages.append({"role": "user", "content": user_msg})
83
  messages.append({"role": "assistant", "content": bot_msg})
84
  messages.append({"role": "user", "content": message})
85
-
86
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
87
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
88
-
89
  streamer = TextIteratorStreamer(
90
  tokenizer, skip_prompt=True, skip_special_tokens=True
91
  )
92
-
93
  generate_kwargs = dict(
94
  **inputs,
95
  streamer=streamer,
@@ -98,17 +92,13 @@ def chat():
98
  top_p=top_p,
99
  do_sample=True,
100
  )
101
-
102
  thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
103
  thread.start()
104
-
105
  def generate():
106
  for new_text in streamer:
107
  yield f"data: {json.dumps({'response': new_text})}\n\n"
108
  yield "data: [DONE]\n\n"
109
-
110
  return Response(generate(), mimetype="text/event-stream")
111
-
112
  except Exception as e:
113
  print("Error during chat:", e)
114
  return {"error": str(e)}, 500
@@ -117,24 +107,19 @@ def chat():
117
  def save_model():
118
  """Save model and tokenizer to Hugging Face Hub"""
119
  global model, tokenizer, model_loaded
120
-
121
  if not model_loaded:
122
  return {"error": "Model is still loading. Try again later."}, 503
123
-
124
  try:
125
  # Authenticate with Hugging Face
126
  token = request.json.get("token")
127
  if not token:
128
  return {"error": "Hugging Face token required"}, 400
129
  login(token=token)
130
-
131
  # Define repository
132
- repo_id = "priyanshu/playwebit"
133
  save_directory = "/tmp/playwebit"
134
-
135
  # Create temporary directory
136
  os.makedirs(save_directory, exist_ok=True)
137
-
138
  # Save custom model class (replace with actual implementation)
139
  custom_model_code = """
140
  from transformers import PreTrainedModel
@@ -154,15 +139,12 @@ class BitNetForCausalLM(PreTrainedModel):
154
  """
155
  with open(os.path.join(save_directory, "custom_bitnet.py"), "w") as f:
156
  f.write(custom_model_code)
157
-
158
  # Save configuration
159
  model.config.save_pretrained(save_directory)
160
-
161
  # Save model and tokenizer
162
  print("Saving model and tokenizer...")
163
  model.save_pretrained(save_directory, safe_serialization=True, max_shard_size="5GB")
164
  tokenizer.save_pretrained(save_directory)
165
-
166
  # Update config.json to reference custom class
167
  import json
168
  config_path = os.path.join(save_directory, "config.json")
@@ -171,7 +153,6 @@ class BitNetForCausalLM(PreTrainedModel):
171
  config_json["architectures"] = ["BitNetForCausalLM"]
172
  with open(config_path, "w") as f:
173
  json.dump(config_json, f, indent=2)
174
-
175
  # Try TensorFlow conversion
176
  try:
177
  from transformers import TFAutoModelForCausalLM
@@ -180,23 +161,4 @@ class BitNetForCausalLM(PreTrainedModel):
180
  tf_model.save_pretrained(save_directory)
181
  print("TensorFlow weights saved.")
182
  except Exception as e:
183
- print(f"Error converting to TensorFlow: {e}")
184
-
185
- # Upload to Hugging Face Hub
186
- api = HfApi()
187
- print(f"Uploading to {repo_id}...")
188
- api.upload_folder(
189
- folder_path=save_directory,
190
- repo_id=repo_id,
191
- repo_type="model",
192
- commit_message="Upload PlayWeBit model, tokenizer, and custom class"
193
- )
194
-
195
- return {"message": f"Model uploaded to https://huggingface.co/{repo_id}"}
196
-
197
- except Exception as e:
198
- print("Error saving model:", e)
199
- return {"error": str(e)}, 500
200
-
201
- if __name__ == "__main__":
202
- app.run(host="0.0.0.0", port=7860)
 
56
  def chat():
57
  """Chat endpoint with BitNet streaming response"""
58
  global model_loaded, model, tokenizer
 
59
  if not model_loaded:
60
  return {
61
  "status": "initializing",
62
  "message": "Model is still loading. Please try again shortly."
63
  }, 503
 
64
  try:
65
  from transformers import TextIteratorStreamer
66
  data = request.get_json()
 
74
  max_tokens = data.get("max_tokens", 512)
75
  temperature = data.get("temperature", 0.7)
76
  top_p = data.get("top_p", 0.95)
 
77
  messages = [{"role": "system", "content": system_message}]
78
  for user_msg, bot_msg in history:
79
  messages.append({"role": "user", "content": user_msg})
80
  messages.append({"role": "assistant", "content": bot_msg})
81
  messages.append({"role": "user", "content": message})
 
82
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
83
  inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
 
84
  streamer = TextIteratorStreamer(
85
  tokenizer, skip_prompt=True, skip_special_tokens=True
86
  )
 
87
  generate_kwargs = dict(
88
  **inputs,
89
  streamer=streamer,
 
92
  top_p=top_p,
93
  do_sample=True,
94
  )
 
95
  thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
96
  thread.start()
 
97
  def generate():
98
  for new_text in streamer:
99
  yield f"data: {json.dumps({'response': new_text})}\n\n"
100
  yield "data: [DONE]\n\n"
 
101
  return Response(generate(), mimetype="text/event-stream")
 
102
  except Exception as e:
103
  print("Error during chat:", e)
104
  return {"error": str(e)}, 500
 
107
  def save_model():
108
  """Save model and tokenizer to Hugging Face Hub"""
109
  global model, tokenizer, model_loaded
 
110
  if not model_loaded:
111
  return {"error": "Model is still loading. Try again later."}, 503
 
112
  try:
113
  # Authenticate with Hugging Face
114
  token = request.json.get("token")
115
  if not token:
116
  return {"error": "Hugging Face token required"}, 400
117
  login(token=token)
 
118
  # Define repository
119
+ repo_id = "mike23415/playwebit"
120
  save_directory = "/tmp/playwebit"
 
121
  # Create temporary directory
122
  os.makedirs(save_directory, exist_ok=True)
 
123
  # Save custom model class (replace with actual implementation)
124
  custom_model_code = """
125
  from transformers import PreTrainedModel
 
139
  """
140
  with open(os.path.join(save_directory, "custom_bitnet.py"), "w") as f:
141
  f.write(custom_model_code)
 
142
  # Save configuration
143
  model.config.save_pretrained(save_directory)
 
144
  # Save model and tokenizer
145
  print("Saving model and tokenizer...")
146
  model.save_pretrained(save_directory, safe_serialization=True, max_shard_size="5GB")
147
  tokenizer.save_pretrained(save_directory)
 
148
  # Update config.json to reference custom class
149
  import json
150
  config_path = os.path.join(save_directory, "config.json")
 
153
  config_json["architectures"] = ["BitNetForCausalLM"]
154
  with open(config_path, "w") as f:
155
  json.dump(config_json, f, indent=2)
 
156
  # Try TensorFlow conversion
157
  try:
158
  from transformers import TFAutoModelForCausalLM
 
161
  tf_model.save_pretrained(save_directory)
162
  print("TensorFlow weights saved.")
163
  except Exception as e:
164
+ print(f"Error converting to TensorFlow: {e}")