Spaces:
Running
on
Zero
Running
on
Zero
AbstractPhil
commited on
Commit
Β·
3a8756f
1
Parent(s):
9dc2118
yes
Browse files- app.py +16 -51
- install.sh +73 -14
app.py
CHANGED
@@ -1,36 +1,11 @@
|
|
1 |
"""
|
2 |
Mirel Harmony Inference β HF Space (Gradio)
|
3 |
-
ZeroGPU-ready, Harmony formatting,
|
4 |
-
Proper LoRA adapter loading
|
5 |
Single file: app.py
|
6 |
-
|
7 |
-
Requirements:
|
8 |
-
huggingface_hub>=0.34.0
|
9 |
-
transformers>=4.55.0
|
10 |
-
accelerate>=0.33.0
|
11 |
-
peft>=0.11.0
|
12 |
-
torch>=2.4.0
|
13 |
-
bitsandbytes>=0.43.1
|
14 |
-
openai-harmony
|
15 |
-
gradio>=5.42.0
|
16 |
-
triton>=3.4.0
|
17 |
-
git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels
|
18 |
"""
|
19 |
from __future__ import annotations
|
20 |
|
21 |
-
# Import setup to fix Triton if needed
|
22 |
-
try:
|
23 |
-
import setup # This will run install.sh if Triton needs fixing
|
24 |
-
except ImportError:
|
25 |
-
print("No setup.py found - checking Triton manually")
|
26 |
-
# Fallback check
|
27 |
-
try:
|
28 |
-
import triton_kernels
|
29 |
-
from triton.tools.ragged_tma import load_ragged
|
30 |
-
print("β Triton configured correctly")
|
31 |
-
except ImportError:
|
32 |
-
print("β Triton not configured for MX - run install.sh")
|
33 |
-
|
34 |
# ===== MAIN IMPORTS =====
|
35 |
import os, gc, json, warnings, traceback
|
36 |
import subprocess, sys
|
@@ -77,17 +52,13 @@ except Exception:
|
|
77 |
_HAS_PEFT = False
|
78 |
print("β PEFT not available. Install with: pip install peft")
|
79 |
|
80 |
-
#
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
except ImportError as e:
|
88 |
-
_HAS_TRITON_KERNELS = False
|
89 |
-
print(f"β triton_kernels not fully functional: {e}")
|
90 |
-
print("MX format will fall back to bf16 - LoRA may not work correctly")
|
91 |
|
92 |
# ===== CONFIGURATION =====
|
93 |
MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-oss-20b")
|
@@ -258,7 +229,7 @@ def load_base_model(device_map: Optional[str] = "auto") -> AutoModelForCausalLM:
|
|
258 |
raise
|
259 |
|
260 |
def load_lora_adapter(model, adapter_id: str, subfolder: Optional[str] = None):
|
261 |
-
"""Load and attach LoRA adapter
|
262 |
if not _HAS_PEFT:
|
263 |
raise RuntimeError("PEFT is required for LoRA adapters")
|
264 |
|
@@ -268,9 +239,6 @@ def load_lora_adapter(model, adapter_id: str, subfolder: Optional[str] = None):
|
|
268 |
print(f"Subfolder: {subfolder}")
|
269 |
print(f"{'='*50}\n")
|
270 |
|
271 |
-
# Check if model is using MX format
|
272 |
-
is_mx = detect_mx_format(model) if IS_GPT_OSS else False
|
273 |
-
|
274 |
# Prepare kwargs for PEFT
|
275 |
peft_kwargs = {"token": HF_TOKEN, "is_trainable": False}
|
276 |
if subfolder:
|
@@ -284,16 +252,13 @@ def load_lora_adapter(model, adapter_id: str, subfolder: Optional[str] = None):
|
|
284 |
# Load the adapter
|
285 |
model = PeftModel.from_pretrained(model, adapter_id, **peft_kwargs)
|
286 |
|
287 |
-
|
288 |
-
|
289 |
-
print("
|
290 |
-
|
291 |
-
|
292 |
-
for name, param in model.named_parameters():
|
293 |
-
if 'lora_' in name:
|
294 |
-
param.data *= 0.1
|
295 |
|
296 |
-
print("β LoRA adapter loaded
|
297 |
|
298 |
# Optionally merge adapter
|
299 |
if MERGE_ADAPTER and hasattr(model, 'merge_and_unload'):
|
|
|
1 |
"""
|
2 |
Mirel Harmony Inference β HF Space (Gradio)
|
3 |
+
ZeroGPU-ready, Harmony formatting, bf16 mode for GPT-OSS-20B
|
4 |
+
Proper LoRA adapter loading (MX format not available in stable releases)
|
5 |
Single file: app.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
"""
|
7 |
from __future__ import annotations
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
# ===== MAIN IMPORTS =====
|
10 |
import os, gc, json, warnings, traceback
|
11 |
import subprocess, sys
|
|
|
52 |
_HAS_PEFT = False
|
53 |
print("β PEFT not available. Install with: pip install peft")
|
54 |
|
55 |
+
# Note: MX format requires unreleased Triton features
|
56 |
+
# We'll use bf16 mode which works fine for inference
|
57 |
+
_HAS_TRITON_KERNELS = False
|
58 |
+
USE_MX_FORMAT = False
|
59 |
+
|
60 |
+
print("Note: Using bf16 mode (MX format requires unreleased Triton features)")
|
61 |
+
print("This will work fine but use more memory than native MX format")
|
|
|
|
|
|
|
|
|
62 |
|
63 |
# ===== CONFIGURATION =====
|
64 |
MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-oss-20b")
|
|
|
229 |
raise
|
230 |
|
231 |
def load_lora_adapter(model, adapter_id: str, subfolder: Optional[str] = None):
|
232 |
+
"""Load and attach LoRA adapter for bf16 model."""
|
233 |
if not _HAS_PEFT:
|
234 |
raise RuntimeError("PEFT is required for LoRA adapters")
|
235 |
|
|
|
239 |
print(f"Subfolder: {subfolder}")
|
240 |
print(f"{'='*50}\n")
|
241 |
|
|
|
|
|
|
|
242 |
# Prepare kwargs for PEFT
|
243 |
peft_kwargs = {"token": HF_TOKEN, "is_trainable": False}
|
244 |
if subfolder:
|
|
|
252 |
# Load the adapter
|
253 |
model = PeftModel.from_pretrained(model, adapter_id, **peft_kwargs)
|
254 |
|
255 |
+
# Warning about potential mismatch
|
256 |
+
if IS_GPT_OSS:
|
257 |
+
print("β WARNING: LoRA may have been trained on MX format")
|
258 |
+
print(" Model is running in bf16 mode - there may be compatibility issues")
|
259 |
+
print(" If generation quality is poor, the LoRA may need retraining on bf16")
|
|
|
|
|
|
|
260 |
|
261 |
+
print("β LoRA adapter loaded")
|
262 |
|
263 |
# Optionally merge adapter
|
264 |
if MERGE_ADAPTER and hasattr(model, 'merge_and_unload'):
|
install.sh
CHANGED
@@ -6,10 +6,11 @@ echo "Installing Mirel dependencies for GPT-OSS with MX format support..."
|
|
6 |
# Upgrade pip first
|
7 |
pip install --upgrade pip
|
8 |
|
9 |
-
# Install main requirements
|
10 |
pip install huggingface_hub>=0.34.0
|
11 |
pip install transformers>=4.55.0
|
12 |
pip install accelerate>=0.33.0
|
|
|
13 |
pip install gradio>=5.42.0
|
14 |
pip install spaces
|
15 |
|
@@ -20,23 +21,81 @@ pip install bitsandbytes>=0.43.1
|
|
20 |
# Install Harmony format
|
21 |
pip install openai-harmony
|
22 |
|
23 |
-
#
|
24 |
-
|
25 |
-
echo "Fixing Triton installation for MX format..."
|
26 |
|
27 |
-
#
|
28 |
-
|
|
|
|
|
29 |
|
30 |
-
#
|
31 |
-
|
32 |
-
pip install
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
-
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
-
|
38 |
-
echo "Installing triton_kernels (REQUIRED for MX format)..."
|
39 |
-
pip install git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels
|
40 |
|
41 |
# Optional but recommended
|
42 |
pip install safetensors>=0.4.0
|
|
|
6 |
# Upgrade pip first
|
7 |
pip install --upgrade pip
|
8 |
|
9 |
+
# Install main requirements WITH SPECIFIC VERSIONS for ZeroGPU compatibility
|
10 |
pip install huggingface_hub>=0.34.0
|
11 |
pip install transformers>=4.55.0
|
12 |
pip install accelerate>=0.33.0
|
13 |
+
pip install torch==2.4.0 # SPECIFIC VERSION for ZeroGPU - DO NOT use nightly!
|
14 |
pip install gradio>=5.42.0
|
15 |
pip install spaces
|
16 |
|
|
|
21 |
# Install Harmony format
|
22 |
pip install openai-harmony
|
23 |
|
24 |
+
# Install standard Triton (MX will fallback to bf16)
|
25 |
+
pip install triton>=3.0.0
|
|
|
26 |
|
27 |
+
# Note: triton_kernels with ragged_tma is not available in stable releases
|
28 |
+
# The model will fall back to bf16 mode which is fine for inference
|
29 |
+
echo "Note: MX format requires bleeding-edge Triton features not available in stable releases."
|
30 |
+
echo "The model will use bf16 mode instead, which works fine but uses more memory."
|
31 |
|
32 |
+
# Optional but recommended
|
33 |
+
pip install safetensors>=0.4.0
|
34 |
+
pip install sentencepiece>=0.2.0
|
35 |
+
pip install protobuf>=3.20.0
|
36 |
+
pip install "numpy<2.0.0"
|
37 |
+
|
38 |
+
# Verify critical imports
|
39 |
+
echo "Verifying installation..."
|
40 |
+
python -c "
|
41 |
+
import sys
|
42 |
+
errors = []
|
43 |
+
|
44 |
+
try:
|
45 |
+
import torch
|
46 |
+
print(f'β PyTorch {torch.__version__}')
|
47 |
+
# Check CUDA availability without initializing it (for ZeroGPU)
|
48 |
+
print(f' CUDA available: Will be checked at runtime')
|
49 |
+
except ImportError as e:
|
50 |
+
errors.append(f'β PyTorch: {e}')
|
51 |
+
|
52 |
+
try:
|
53 |
+
import transformers
|
54 |
+
print(f'β Transformers {transformers.__version__}')
|
55 |
+
except ImportError as e:
|
56 |
+
errors.append(f'β Transformers: {e}')
|
57 |
+
|
58 |
+
try:
|
59 |
+
import peft
|
60 |
+
print(f'β PEFT {peft.__version__}')
|
61 |
+
except ImportError as e:
|
62 |
+
errors.append(f'β PEFT: {e}')
|
63 |
|
64 |
+
try:
|
65 |
+
import triton
|
66 |
+
print(f'β Triton {triton.__version__}')
|
67 |
+
except ImportError as e:
|
68 |
+
errors.append(f'β Triton: {e}')
|
69 |
+
|
70 |
+
try:
|
71 |
+
import openai_harmony
|
72 |
+
print('β OpenAI Harmony')
|
73 |
+
except ImportError as e:
|
74 |
+
errors.append(f'β OpenAI Harmony: {e}')
|
75 |
+
|
76 |
+
try:
|
77 |
+
import gradio
|
78 |
+
print(f'β Gradio {gradio.__version__}')
|
79 |
+
except ImportError as e:
|
80 |
+
errors.append(f'β Gradio: {e}')
|
81 |
+
|
82 |
+
try:
|
83 |
+
import spaces
|
84 |
+
print('β Spaces (ZeroGPU support)')
|
85 |
+
except ImportError as e:
|
86 |
+
errors.append(f'β Spaces: {e}')
|
87 |
+
|
88 |
+
if errors:
|
89 |
+
print('\nβ Installation issues found:')
|
90 |
+
for error in errors:
|
91 |
+
print(f' {error}')
|
92 |
+
sys.exit(1)
|
93 |
+
else:
|
94 |
+
print('\nβ
All dependencies installed successfully!')
|
95 |
+
print('Note: Model will run in bf16 mode (MX format requires unreleased Triton features)')
|
96 |
+
"
|
97 |
|
98 |
+
echo "Installation complete!"
|
|
|
|
|
99 |
|
100 |
# Optional but recommended
|
101 |
pip install safetensors>=0.4.0
|