mike23415 commited on
Commit
e767f7f
·
verified ·
1 Parent(s): 75d5062

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -164
app.py DELETED
@@ -1,164 +0,0 @@
1
- import threading
2
- import torch
3
- import os
4
- from flask import Flask, request, Response, jsonify
5
- from flask_cors import CORS
6
- from huggingface_hub import HfApi, login
7
-
8
- app = Flask(__name__)
9
- CORS(app)
10
-
11
- # Global state
12
- tokenizer = None
13
- model = None
14
- model_loading = False
15
- model_loaded = False
16
- model_id = "microsoft/bitnet-b1.58-2B-4T"
17
-
18
- # Load model in background
19
- def load_model_thread():
20
- global tokenizer, model, model_loaded, model_loading
21
- try:
22
- model_loading = True
23
- from transformers import AutoTokenizer, AutoModelForCausalLM
24
- print("Loading tokenizer...")
25
- tokenizer = AutoTokenizer.from_pretrained(model_id)
26
- print("Loading model...")
27
- model = AutoModelForCausalLM.from_pretrained(
28
- model_id,
29
- torch_dtype=torch.float32,
30
- device_map=None
31
- ).to("cpu")
32
- model_loaded = True
33
- print("✅ Model loaded successfully.")
34
- except Exception as e:
35
- print(f"❌ Error loading model: {e}")
36
- finally:
37
- model_loading = False
38
-
39
- # Start background model load
40
- threading.Thread(target=load_model_thread, daemon=True).start()
41
-
42
- @app.route("/")
43
- def home():
44
- return "🚀 Flask backend for BitNet is running!"
45
-
46
- @app.route("/api/health", methods=["GET"])
47
- def health():
48
- """Health check endpoint"""
49
- return {
50
- "status": "ok",
51
- "model_loaded": model_loaded,
52
- "model_loading": model_loading
53
- }
54
-
55
- @app.route("/api/chat", methods=["POST"])
56
- def chat():
57
- """Chat endpoint with BitNet streaming response"""
58
- global model_loaded, model, tokenizer
59
- if not model_loaded:
60
- return {
61
- "status": "initializing",
62
- "message": "Model is still loading. Please try again shortly."
63
- }, 503
64
- try:
65
- from transformers import TextIteratorStreamer
66
- data = request.get_json()
67
- message = data.get("message", "")
68
- history = data.get("history", [])
69
- system_message = data.get("system_message", (
70
- "You are a helpful assistant. When generating code, always wrap it in markdown code blocks (```) "
71
- "with the appropriate language identifier (e.g., ```python, ```javascript). "
72
- "Ensure proper indentation and line breaks for readability."
73
- ))
74
- max_tokens = data.get("max_tokens", 512)
75
- temperature = data.get("temperature", 0.7)
76
- top_p = data.get("top_p", 0.95)
77
- messages = [{"role": "system", "content": system_message}]
78
- for user_msg, bot_msg in history:
79
- messages.append({"role": "user", "content": user_msg})
80
- messages.append({"role": "assistant", "content": bot_msg})
81
- messages.append({"role": "user", "content": message})
82
- prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
83
- inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
84
- streamer = TextIteratorStreamer(
85
- tokenizer, skip_prompt=True, skip_special_tokens=True
86
- )
87
- generate_kwargs = dict(
88
- **inputs,
89
- streamer=streamer,
90
- max_new_tokens=max_tokens,
91
- temperature=temperature,
92
- top_p=top_p,
93
- do_sample=True,
94
- )
95
- thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
96
- thread.start()
97
- def generate():
98
- for new_text in streamer:
99
- yield f"data: {json.dumps({'response': new_text})}\n\n"
100
- yield "data: [DONE]\n\n"
101
- return Response(generate(), mimetype="text/event-stream")
102
- except Exception as e:
103
- print("Error during chat:", e)
104
- return {"error": str(e)}, 500
105
-
106
- @app.route("/api/save_model", methods=["POST"])
107
- def save_model():
108
- """Save model and tokenizer to Hugging Face Hub"""
109
- global model, tokenizer, model_loaded
110
- if not model_loaded:
111
- return {"error": "Model is still loading. Try again later."}, 503
112
- try:
113
- # Authenticate with Hugging Face
114
- token = request.json.get("token")
115
- if not token:
116
- return {"error": "Hugging Face token required"}, 400
117
- login(token=token)
118
- # Define repository
119
- repo_id = "mike23415/playwebit"
120
- save_directory = "/tmp/playwebit"
121
- # Create temporary directory
122
- os.makedirs(save_directory, exist_ok=True)
123
- # Save custom model class (replace with actual implementation)
124
- custom_model_code = """
125
- from transformers import PreTrainedModel
126
- from transformers.models.bitnet.configuration_bitnet import BitNetConfig
127
-
128
- class BitNetForCausalLM(PreTrainedModel):
129
- config_class = BitNetConfig
130
-
131
- def __init__(self, config):
132
- super().__init__(config)
133
- # Placeholder: Copy implementation from fork's modeling_bitnet.py
134
- raise NotImplementedError("Replace with actual BitNetForCausalLM implementation")
135
-
136
- def forward(self, *args, **kwargs):
137
- # Placeholder: Copy forward pass from fork
138
- raise NotImplementedError("Replace with actual forward pass implementation")
139
- """
140
- with open(os.path.join(save_directory, "custom_bitnet.py"), "w") as f:
141
- f.write(custom_model_code)
142
- # Save configuration
143
- model.config.save_pretrained(save_directory)
144
- # Save model and tokenizer
145
- print("Saving model and tokenizer...")
146
- model.save_pretrained(save_directory, safe_serialization=True, max_shard_size="5GB")
147
- tokenizer.save_pretrained(save_directory)
148
- # Update config.json to reference custom class
149
- import json
150
- config_path = os.path.join(save_directory, "config.json")
151
- with open(config_path, "r") as f:
152
- config_json = json.load(f)
153
- config_json["architectures"] = ["BitNetForCausalLM"]
154
- with open(config_path, "w") as f:
155
- json.dump(config_json, f, indent=2)
156
- # Try TensorFlow conversion
157
- try:
158
- from transformers import TFAutoModelForCausalLM
159
- print("Converting to TensorFlow weights...")
160
- tf_model = TFAutoModelForCausalLM.from_pretrained(save_directory, from_pt=True)
161
- tf_model.save_pretrained(save_directory)
162
- print("TensorFlow weights saved.")
163
- except Exception as e:
164
- print(f"Error converting to TensorFlow: {e}")