AbstractPhil commited on
Commit
3a8756f
Β·
1 Parent(s): 9dc2118
Files changed (2) hide show
  1. app.py +16 -51
  2. install.sh +73 -14
app.py CHANGED
@@ -1,36 +1,11 @@
1
  """
2
  Mirel Harmony Inference – HF Space (Gradio)
3
- ZeroGPU-ready, Harmony formatting, MX format support for GPT-OSS-20B
4
- Proper LoRA adapter loading and conversion for MX compatibility
5
  Single file: app.py
6
-
7
- Requirements:
8
- huggingface_hub>=0.34.0
9
- transformers>=4.55.0
10
- accelerate>=0.33.0
11
- peft>=0.11.0
12
- torch>=2.4.0
13
- bitsandbytes>=0.43.1
14
- openai-harmony
15
- gradio>=5.42.0
16
- triton>=3.4.0
17
- git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels
18
  """
19
  from __future__ import annotations
20
 
21
- # Import setup to fix Triton if needed
22
- try:
23
- import setup # This will run install.sh if Triton needs fixing
24
- except ImportError:
25
- print("No setup.py found - checking Triton manually")
26
- # Fallback check
27
- try:
28
- import triton_kernels
29
- from triton.tools.ragged_tma import load_ragged
30
- print("βœ“ Triton configured correctly")
31
- except ImportError:
32
- print("⚠ Triton not configured for MX - run install.sh")
33
-
34
  # ===== MAIN IMPORTS =====
35
  import os, gc, json, warnings, traceback
36
  import subprocess, sys
@@ -77,17 +52,13 @@ except Exception:
77
  _HAS_PEFT = False
78
  print("⚠ PEFT not available. Install with: pip install peft")
79
 
80
- # Check for triton_kernels after setup
81
- try:
82
- import triton_kernels
83
- # Also check for the specific module that was missing
84
- from triton.tools.ragged_tma import load_ragged, store_ragged
85
- _HAS_TRITON_KERNELS = True
86
- print("βœ“ triton_kernels loaded with ragged_tma support - MX format enabled")
87
- except ImportError as e:
88
- _HAS_TRITON_KERNELS = False
89
- print(f"βœ— triton_kernels not fully functional: {e}")
90
- print("MX format will fall back to bf16 - LoRA may not work correctly")
91
 
92
  # ===== CONFIGURATION =====
93
  MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-oss-20b")
@@ -258,7 +229,7 @@ def load_base_model(device_map: Optional[str] = "auto") -> AutoModelForCausalLM:
258
  raise
259
 
260
  def load_lora_adapter(model, adapter_id: str, subfolder: Optional[str] = None):
261
- """Load and attach LoRA adapter with MX format handling."""
262
  if not _HAS_PEFT:
263
  raise RuntimeError("PEFT is required for LoRA adapters")
264
 
@@ -268,9 +239,6 @@ def load_lora_adapter(model, adapter_id: str, subfolder: Optional[str] = None):
268
  print(f"Subfolder: {subfolder}")
269
  print(f"{'='*50}\n")
270
 
271
- # Check if model is using MX format
272
- is_mx = detect_mx_format(model) if IS_GPT_OSS else False
273
-
274
  # Prepare kwargs for PEFT
275
  peft_kwargs = {"token": HF_TOKEN, "is_trainable": False}
276
  if subfolder:
@@ -284,16 +252,13 @@ def load_lora_adapter(model, adapter_id: str, subfolder: Optional[str] = None):
284
  # Load the adapter
285
  model = PeftModel.from_pretrained(model, adapter_id, **peft_kwargs)
286
 
287
- if not is_mx and IS_GPT_OSS:
288
- print("⚠ WARNING: Model is bf16 but LoRA was likely trained on MX format")
289
- print(" Reducing LoRA influence to 10% to prevent corruption")
290
-
291
- # Scale down LoRA weights
292
- for name, param in model.named_parameters():
293
- if 'lora_' in name:
294
- param.data *= 0.1
295
 
296
- print("βœ“ LoRA adapter loaded successfully")
297
 
298
  # Optionally merge adapter
299
  if MERGE_ADAPTER and hasattr(model, 'merge_and_unload'):
 
1
  """
2
  Mirel Harmony Inference – HF Space (Gradio)
3
+ ZeroGPU-ready, Harmony formatting, bf16 mode for GPT-OSS-20B
4
+ Proper LoRA adapter loading (MX format not available in stable releases)
5
  Single file: app.py
 
 
 
 
 
 
 
 
 
 
 
 
6
  """
7
  from __future__ import annotations
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # ===== MAIN IMPORTS =====
10
  import os, gc, json, warnings, traceback
11
  import subprocess, sys
 
52
  _HAS_PEFT = False
53
  print("⚠ PEFT not available. Install with: pip install peft")
54
 
55
+ # Note: MX format requires unreleased Triton features
56
+ # We'll use bf16 mode which works fine for inference
57
+ _HAS_TRITON_KERNELS = False
58
+ USE_MX_FORMAT = False
59
+
60
+ print("Note: Using bf16 mode (MX format requires unreleased Triton features)")
61
+ print("This will work fine but use more memory than native MX format")
 
 
 
 
62
 
63
  # ===== CONFIGURATION =====
64
  MODEL_ID = os.getenv("MODEL_ID", "openai/gpt-oss-20b")
 
229
  raise
230
 
231
  def load_lora_adapter(model, adapter_id: str, subfolder: Optional[str] = None):
232
+ """Load and attach LoRA adapter for bf16 model."""
233
  if not _HAS_PEFT:
234
  raise RuntimeError("PEFT is required for LoRA adapters")
235
 
 
239
  print(f"Subfolder: {subfolder}")
240
  print(f"{'='*50}\n")
241
 
 
 
 
242
  # Prepare kwargs for PEFT
243
  peft_kwargs = {"token": HF_TOKEN, "is_trainable": False}
244
  if subfolder:
 
252
  # Load the adapter
253
  model = PeftModel.from_pretrained(model, adapter_id, **peft_kwargs)
254
 
255
+ # Warning about potential mismatch
256
+ if IS_GPT_OSS:
257
+ print("⚠ WARNING: LoRA may have been trained on MX format")
258
+ print(" Model is running in bf16 mode - there may be compatibility issues")
259
+ print(" If generation quality is poor, the LoRA may need retraining on bf16")
 
 
 
260
 
261
+ print("βœ“ LoRA adapter loaded")
262
 
263
  # Optionally merge adapter
264
  if MERGE_ADAPTER and hasattr(model, 'merge_and_unload'):
install.sh CHANGED
@@ -6,10 +6,11 @@ echo "Installing Mirel dependencies for GPT-OSS with MX format support..."
6
  # Upgrade pip first
7
  pip install --upgrade pip
8
 
9
- # Install main requirements
10
  pip install huggingface_hub>=0.34.0
11
  pip install transformers>=4.55.0
12
  pip install accelerate>=0.33.0
 
13
  pip install gradio>=5.42.0
14
  pip install spaces
15
 
@@ -20,23 +21,81 @@ pip install bitsandbytes>=0.43.1
20
  # Install Harmony format
21
  pip install openai-harmony
22
 
23
- # FIX TRITON FOR MX FORMAT
24
- # The standard triton doesn't have ragged_tma module needed for MX
25
- echo "Fixing Triton installation for MX format..."
26
 
27
- # Clean existing triton installations
28
- pip uninstall -y triton triton_kernels 2>/dev/null || true
 
 
29
 
30
- # Install PyTorch nightly (includes compatible Triton)
31
- echo "Installing PyTorch nightly with compatible Triton..."
32
- pip install --upgrade --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu121
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
- # Install Triton from PyTorch nightly
35
- pip install --upgrade --index-url https://download.pytorch.org/whl/nightly/cu121 triton
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- # Install triton_kernels from source
38
- echo "Installing triton_kernels (REQUIRED for MX format)..."
39
- pip install git+https://github.com/triton-lang/triton.git@main#subdirectory=python/triton_kernels
40
 
41
  # Optional but recommended
42
  pip install safetensors>=0.4.0
 
6
  # Upgrade pip first
7
  pip install --upgrade pip
8
 
9
+ # Install main requirements WITH SPECIFIC VERSIONS for ZeroGPU compatibility
10
  pip install huggingface_hub>=0.34.0
11
  pip install transformers>=4.55.0
12
  pip install accelerate>=0.33.0
13
+ pip install torch==2.4.0 # SPECIFIC VERSION for ZeroGPU - DO NOT use nightly!
14
  pip install gradio>=5.42.0
15
  pip install spaces
16
 
 
21
  # Install Harmony format
22
  pip install openai-harmony
23
 
24
+ # Install standard Triton (MX will fallback to bf16)
25
+ pip install triton>=3.0.0
 
26
 
27
+ # Note: triton_kernels with ragged_tma is not available in stable releases
28
+ # The model will fall back to bf16 mode which is fine for inference
29
+ echo "Note: MX format requires bleeding-edge Triton features not available in stable releases."
30
+ echo "The model will use bf16 mode instead, which works fine but uses more memory."
31
 
32
+ # Optional but recommended
33
+ pip install safetensors>=0.4.0
34
+ pip install sentencepiece>=0.2.0
35
+ pip install protobuf>=3.20.0
36
+ pip install "numpy<2.0.0"
37
+
38
+ # Verify critical imports
39
+ echo "Verifying installation..."
40
+ python -c "
41
+ import sys
42
+ errors = []
43
+
44
+ try:
45
+ import torch
46
+ print(f'βœ“ PyTorch {torch.__version__}')
47
+ # Check CUDA availability without initializing it (for ZeroGPU)
48
+ print(f' CUDA available: Will be checked at runtime')
49
+ except ImportError as e:
50
+ errors.append(f'βœ— PyTorch: {e}')
51
+
52
+ try:
53
+ import transformers
54
+ print(f'βœ“ Transformers {transformers.__version__}')
55
+ except ImportError as e:
56
+ errors.append(f'βœ— Transformers: {e}')
57
+
58
+ try:
59
+ import peft
60
+ print(f'βœ“ PEFT {peft.__version__}')
61
+ except ImportError as e:
62
+ errors.append(f'βœ— PEFT: {e}')
63
 
64
+ try:
65
+ import triton
66
+ print(f'βœ“ Triton {triton.__version__}')
67
+ except ImportError as e:
68
+ errors.append(f'βœ— Triton: {e}')
69
+
70
+ try:
71
+ import openai_harmony
72
+ print('βœ“ OpenAI Harmony')
73
+ except ImportError as e:
74
+ errors.append(f'βœ— OpenAI Harmony: {e}')
75
+
76
+ try:
77
+ import gradio
78
+ print(f'βœ“ Gradio {gradio.__version__}')
79
+ except ImportError as e:
80
+ errors.append(f'βœ— Gradio: {e}')
81
+
82
+ try:
83
+ import spaces
84
+ print('βœ“ Spaces (ZeroGPU support)')
85
+ except ImportError as e:
86
+ errors.append(f'βœ— Spaces: {e}')
87
+
88
+ if errors:
89
+ print('\n❌ Installation issues found:')
90
+ for error in errors:
91
+ print(f' {error}')
92
+ sys.exit(1)
93
+ else:
94
+ print('\nβœ… All dependencies installed successfully!')
95
+ print('Note: Model will run in bf16 mode (MX format requires unreleased Triton features)')
96
+ "
97
 
98
+ echo "Installation complete!"
 
 
99
 
100
  # Optional but recommended
101
  pip install safetensors>=0.4.0