Update app.py
Browse files
app.py
CHANGED
@@ -56,13 +56,11 @@ def health():
|
|
56 |
def chat():
|
57 |
"""Chat endpoint with BitNet streaming response"""
|
58 |
global model_loaded, model, tokenizer
|
59 |
-
|
60 |
if not model_loaded:
|
61 |
return {
|
62 |
"status": "initializing",
|
63 |
"message": "Model is still loading. Please try again shortly."
|
64 |
}, 503
|
65 |
-
|
66 |
try:
|
67 |
from transformers import TextIteratorStreamer
|
68 |
data = request.get_json()
|
@@ -76,20 +74,16 @@ def chat():
|
|
76 |
max_tokens = data.get("max_tokens", 512)
|
77 |
temperature = data.get("temperature", 0.7)
|
78 |
top_p = data.get("top_p", 0.95)
|
79 |
-
|
80 |
messages = [{"role": "system", "content": system_message}]
|
81 |
for user_msg, bot_msg in history:
|
82 |
messages.append({"role": "user", "content": user_msg})
|
83 |
messages.append({"role": "assistant", "content": bot_msg})
|
84 |
messages.append({"role": "user", "content": message})
|
85 |
-
|
86 |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
87 |
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
|
88 |
-
|
89 |
streamer = TextIteratorStreamer(
|
90 |
tokenizer, skip_prompt=True, skip_special_tokens=True
|
91 |
)
|
92 |
-
|
93 |
generate_kwargs = dict(
|
94 |
**inputs,
|
95 |
streamer=streamer,
|
@@ -98,17 +92,13 @@ def chat():
|
|
98 |
top_p=top_p,
|
99 |
do_sample=True,
|
100 |
)
|
101 |
-
|
102 |
thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
|
103 |
thread.start()
|
104 |
-
|
105 |
def generate():
|
106 |
for new_text in streamer:
|
107 |
yield f"data: {json.dumps({'response': new_text})}\n\n"
|
108 |
yield "data: [DONE]\n\n"
|
109 |
-
|
110 |
return Response(generate(), mimetype="text/event-stream")
|
111 |
-
|
112 |
except Exception as e:
|
113 |
print("Error during chat:", e)
|
114 |
return {"error": str(e)}, 500
|
@@ -117,24 +107,19 @@ def chat():
|
|
117 |
def save_model():
|
118 |
"""Save model and tokenizer to Hugging Face Hub"""
|
119 |
global model, tokenizer, model_loaded
|
120 |
-
|
121 |
if not model_loaded:
|
122 |
return {"error": "Model is still loading. Try again later."}, 503
|
123 |
-
|
124 |
try:
|
125 |
# Authenticate with Hugging Face
|
126 |
token = request.json.get("token")
|
127 |
if not token:
|
128 |
return {"error": "Hugging Face token required"}, 400
|
129 |
login(token=token)
|
130 |
-
|
131 |
# Define repository
|
132 |
-
repo_id = "
|
133 |
save_directory = "/tmp/playwebit"
|
134 |
-
|
135 |
# Create temporary directory
|
136 |
os.makedirs(save_directory, exist_ok=True)
|
137 |
-
|
138 |
# Save custom model class (replace with actual implementation)
|
139 |
custom_model_code = """
|
140 |
from transformers import PreTrainedModel
|
@@ -154,15 +139,12 @@ class BitNetForCausalLM(PreTrainedModel):
|
|
154 |
"""
|
155 |
with open(os.path.join(save_directory, "custom_bitnet.py"), "w") as f:
|
156 |
f.write(custom_model_code)
|
157 |
-
|
158 |
# Save configuration
|
159 |
model.config.save_pretrained(save_directory)
|
160 |
-
|
161 |
# Save model and tokenizer
|
162 |
print("Saving model and tokenizer...")
|
163 |
model.save_pretrained(save_directory, safe_serialization=True, max_shard_size="5GB")
|
164 |
tokenizer.save_pretrained(save_directory)
|
165 |
-
|
166 |
# Update config.json to reference custom class
|
167 |
import json
|
168 |
config_path = os.path.join(save_directory, "config.json")
|
@@ -171,7 +153,6 @@ class BitNetForCausalLM(PreTrainedModel):
|
|
171 |
config_json["architectures"] = ["BitNetForCausalLM"]
|
172 |
with open(config_path, "w") as f:
|
173 |
json.dump(config_json, f, indent=2)
|
174 |
-
|
175 |
# Try TensorFlow conversion
|
176 |
try:
|
177 |
from transformers import TFAutoModelForCausalLM
|
@@ -180,23 +161,4 @@ class BitNetForCausalLM(PreTrainedModel):
|
|
180 |
tf_model.save_pretrained(save_directory)
|
181 |
print("TensorFlow weights saved.")
|
182 |
except Exception as e:
|
183 |
-
print(f"Error converting to TensorFlow: {e}")
|
184 |
-
|
185 |
-
# Upload to Hugging Face Hub
|
186 |
-
api = HfApi()
|
187 |
-
print(f"Uploading to {repo_id}...")
|
188 |
-
api.upload_folder(
|
189 |
-
folder_path=save_directory,
|
190 |
-
repo_id=repo_id,
|
191 |
-
repo_type="model",
|
192 |
-
commit_message="Upload PlayWeBit model, tokenizer, and custom class"
|
193 |
-
)
|
194 |
-
|
195 |
-
return {"message": f"Model uploaded to https://huggingface.co/{repo_id}"}
|
196 |
-
|
197 |
-
except Exception as e:
|
198 |
-
print("Error saving model:", e)
|
199 |
-
return {"error": str(e)}, 500
|
200 |
-
|
201 |
-
if __name__ == "__main__":
|
202 |
-
app.run(host="0.0.0.0", port=7860)
|
|
|
56 |
def chat():
|
57 |
"""Chat endpoint with BitNet streaming response"""
|
58 |
global model_loaded, model, tokenizer
|
|
|
59 |
if not model_loaded:
|
60 |
return {
|
61 |
"status": "initializing",
|
62 |
"message": "Model is still loading. Please try again shortly."
|
63 |
}, 503
|
|
|
64 |
try:
|
65 |
from transformers import TextIteratorStreamer
|
66 |
data = request.get_json()
|
|
|
74 |
max_tokens = data.get("max_tokens", 512)
|
75 |
temperature = data.get("temperature", 0.7)
|
76 |
top_p = data.get("top_p", 0.95)
|
|
|
77 |
messages = [{"role": "system", "content": system_message}]
|
78 |
for user_msg, bot_msg in history:
|
79 |
messages.append({"role": "user", "content": user_msg})
|
80 |
messages.append({"role": "assistant", "content": bot_msg})
|
81 |
messages.append({"role": "user", "content": message})
|
|
|
82 |
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
|
83 |
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
|
|
|
84 |
streamer = TextIteratorStreamer(
|
85 |
tokenizer, skip_prompt=True, skip_special_tokens=True
|
86 |
)
|
|
|
87 |
generate_kwargs = dict(
|
88 |
**inputs,
|
89 |
streamer=streamer,
|
|
|
92 |
top_p=top_p,
|
93 |
do_sample=True,
|
94 |
)
|
|
|
95 |
thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
|
96 |
thread.start()
|
|
|
97 |
def generate():
|
98 |
for new_text in streamer:
|
99 |
yield f"data: {json.dumps({'response': new_text})}\n\n"
|
100 |
yield "data: [DONE]\n\n"
|
|
|
101 |
return Response(generate(), mimetype="text/event-stream")
|
|
|
102 |
except Exception as e:
|
103 |
print("Error during chat:", e)
|
104 |
return {"error": str(e)}, 500
|
|
|
107 |
def save_model():
|
108 |
"""Save model and tokenizer to Hugging Face Hub"""
|
109 |
global model, tokenizer, model_loaded
|
|
|
110 |
if not model_loaded:
|
111 |
return {"error": "Model is still loading. Try again later."}, 503
|
|
|
112 |
try:
|
113 |
# Authenticate with Hugging Face
|
114 |
token = request.json.get("token")
|
115 |
if not token:
|
116 |
return {"error": "Hugging Face token required"}, 400
|
117 |
login(token=token)
|
|
|
118 |
# Define repository
|
119 |
+
repo_id = "mike23415/playwebit"
|
120 |
save_directory = "/tmp/playwebit"
|
|
|
121 |
# Create temporary directory
|
122 |
os.makedirs(save_directory, exist_ok=True)
|
|
|
123 |
# Save custom model class (replace with actual implementation)
|
124 |
custom_model_code = """
|
125 |
from transformers import PreTrainedModel
|
|
|
139 |
"""
|
140 |
with open(os.path.join(save_directory, "custom_bitnet.py"), "w") as f:
|
141 |
f.write(custom_model_code)
|
|
|
142 |
# Save configuration
|
143 |
model.config.save_pretrained(save_directory)
|
|
|
144 |
# Save model and tokenizer
|
145 |
print("Saving model and tokenizer...")
|
146 |
model.save_pretrained(save_directory, safe_serialization=True, max_shard_size="5GB")
|
147 |
tokenizer.save_pretrained(save_directory)
|
|
|
148 |
# Update config.json to reference custom class
|
149 |
import json
|
150 |
config_path = os.path.join(save_directory, "config.json")
|
|
|
153 |
config_json["architectures"] = ["BitNetForCausalLM"]
|
154 |
with open(config_path, "w") as f:
|
155 |
json.dump(config_json, f, indent=2)
|
|
|
156 |
# Try TensorFlow conversion
|
157 |
try:
|
158 |
from transformers import TFAutoModelForCausalLM
|
|
|
161 |
tf_model.save_pretrained(save_directory)
|
162 |
print("TensorFlow weights saved.")
|
163 |
except Exception as e:
|
164 |
+
print(f"Error converting to TensorFlow: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|