mlx-my-repo

Sleeping

App Files Files Community

dicksonhk commited on Jun 18

Commit

0aca28b

1 Parent(s): d1518f3

feat: mlx-vlm support

Browse files

Files changed (2) hide show

app.py +169 -28
requirements.txt +5 -1

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import os
 import tempfile
 os.environ["HF_HUB_CACHE"] = "cache"
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
@@ -15,9 +17,40 @@ from gradio_huggingfacehub_search import HuggingfaceHubSearch
 from apscheduler.schedulers.background import BackgroundScheduler
 from textwrap import dedent
 import mlx_lm
-from mlx_lm import convert
 HF_TOKEN = os.environ.get("HF_TOKEN")
@@ -30,6 +63,64 @@ QUANT_PARAMS = {
     "Q8": 8,
 }
 def list_files_in_folder(folder_path):
     # List all files and directories in the specified folder
     all_items = os.listdir(folder_path)
@@ -48,7 +139,7 @@ def clear_hf_cache_space():
     scan.delete_revisions(*to_delete).execute()
     print("Cache has been cleared")
-def upload_to_hub(path, upload_repo, hf_path, oauth_token):
     card = ModelCard.load(hf_path, token=oauth_token.token)
     card.data.tags = ["mlx"] if card.data.tags is None else card.data.tags + ["mlx", "mlx-my-repo"]
     card.data.base_model = hf_path
@@ -56,29 +147,9 @@ def upload_to_hub(path, upload_repo, hf_path, oauth_token):
         f"""
         # {upload_repo}
-        The Model [{upload_repo}](https://huggingface.co/{upload_repo}) was converted to MLX format from [{hf_path}](https://huggingface.co/{hf_path}) using mlx-lm version **{mlx_lm.__version__}**.
-        ## Use with mlx
-        ```bash
-        pip install mlx-lm
-        ```
-        ```python
-        from mlx_lm import load, generate
-        model, tokenizer = load("{upload_repo}")
-        prompt="hello"
-        if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
-            messages = [{{"role": "user", "content": prompt}}]
-            prompt = tokenizer.apply_chat_template(
-                messages, tokenize=False, add_generation_prompt=True
-            )
-        response = generate(model, tokenizer, prompt=prompt, verbose=True)
-        ```
         """
     )
     card.save(os.path.join(path, "README.md"))
@@ -101,6 +172,76 @@ def upload_to_hub(path, upload_repo, hf_path, oauth_token):
     print(f"Upload successful, go to https://huggingface.co/{upload_repo} for details.")
 def process_model(model_id, q_method, oauth_token: gr.OAuthToken | None):
     if oauth_token.token is None:
         raise ValueError("You must be logged in to use MLX-my-repo")
@@ -113,9 +254,9 @@ def process_model(model_id, q_method, oauth_token: gr.OAuthToken | None):
             with tempfile.TemporaryDirectory(dir="converted") as tmpdir:
                 # The target directory must not exist
                 mlx_path = os.path.join(tmpdir, "mlx")
-                convert(model_id, mlx_path=mlx_path, quantize=False, dtype="float16")
                 print("Conversion done")
-                upload_to_hub(path=mlx_path, upload_repo=upload_repo, hf_path=model_id, oauth_token=oauth_token)
                 print("Upload done")
         else:
             q_bits = QUANT_PARAMS[q_method]
@@ -123,9 +264,9 @@ def process_model(model_id, q_method, oauth_token: gr.OAuthToken | None):
             with tempfile.TemporaryDirectory(dir="converted") as tmpdir:
                 # The target directory must not exist
                 mlx_path = os.path.join(tmpdir, "mlx")
-                convert(model_id, mlx_path=mlx_path, quantize=True, q_bits=q_bits)
                 print("Conversion done")
-                upload_to_hub(path=mlx_path, upload_repo=upload_repo, hf_path=model_id, oauth_token=oauth_token)
                 print("Upload done")
         return (
             f'Find your repo <a href="https://hf.co/{upload_repo}" target="_blank" style="text-decoration:underline">here</a>',

 import os
 import tempfile
+import importlib.util
+from enum import Enum
 os.environ["HF_HUB_CACHE"] = "cache"
 os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
 from apscheduler.schedulers.background import BackgroundScheduler
 from textwrap import dedent
+from typing import (
+    Callable,
+    Dict,
+    Optional,
+    Union,
+    NamedTuple,
+)
+import mlx.nn as nn
 import mlx_lm
+from mlx_lm.utils import (
+    load_config,
+    get_model_path,
+)
+import mlx_vlm
+# mlx-lm/mlx_lm/utils.py
+MODEL_REMAPPING_MLX_LM = {
+    "mistral": "llama",  # mistral is compatible with llama
+    "phi-msft": "phixtral",
+    "falcon_mamba": "mamba",
+}
+# mlx-vlm/mlx_vlm/utils.py
+MODEL_REMAPPING_MLX_VLM = {
+    "llava-qwen2": "llava_bunny",
+    "bunny-llama": "llava_bunny",
+}
+MODEL_REMAPPING = {
+    **MODEL_REMAPPING_MLX_LM,
+    **MODEL_REMAPPING_MLX_VLM,
+}
 HF_TOKEN = os.environ.get("HF_TOKEN")
     "Q8": 8,
 }
+class RuntimeInfo(NamedTuple):
+    name: str
+    package: str
+    version: str
+    convert_fn: Callable
+    usage_example: Callable[[str], str]
+    format: str = "MLX"
+class Runtime(RuntimeInfo, Enum):
+    MLX_LM = RuntimeInfo(
+        name="MLX LM",
+        package="mlx-lm",
+        version=mlx_lm.__version__,
+        convert_fn=mlx_lm.convert,
+        usage_example=lambda upload_repo: dedent(
+            f"""
+            ## Use with mlx
+            ```bash
+            pip install mlx-lm
+            ```
+            ```python
+            from mlx_lm import load, generate
+            model, tokenizer = load("{upload_repo}")
+            prompt="hello"
+            if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
+                messages = [{{"role": "user", "content": prompt}}]
+                prompt = tokenizer.apply_chat_template(
+                    messages, tokenize=False, add_generation_prompt=True
+                )
+            response = generate(model, tokenizer, prompt=prompt, verbose=True)
+            ```
+            """
+        )
+    )
+    MLX_VLM = RuntimeInfo(
+        name="MLX-VLM",
+        package="mlx-vlm",
+        version=mlx_vlm.__version__,
+        convert_fn=mlx_vlm.convert,
+        usage_example=lambda upload_repo: dedent(
+            f"""
+            ```bash
+            pip install -U mlx-vlm
+            ```
+            ```bash
+            python -m mlx_vlm.generate --model {upload_repo} --max-tokens 100 --temp 0.0 --prompt "Describe this image." --image <path_to_image>
+            ```
+            """
+        )
+    )
 def list_files_in_folder(folder_path):
     # List all files and directories in the specified folder
     all_items = os.listdir(folder_path)
     scan.delete_revisions(*to_delete).execute()
     print("Cache has been cleared")
+def upload_to_hub(path, upload_repo, hf_path, oauth_token, runtime: Runtime):
     card = ModelCard.load(hf_path, token=oauth_token.token)
     card.data.tags = ["mlx"] if card.data.tags is None else card.data.tags + ["mlx", "mlx-my-repo"]
     card.data.base_model = hf_path
         f"""
         # {upload_repo}
+        The Model [{upload_repo}](https://huggingface.co/{upload_repo}) was converted to ${runtime.format} format from [{hf_path}](https://huggingface.co/{hf_path}) using ${runtime.package} version **{runtime.version}**.
+        {runtime.usage_example(upload_repo)}
         """
     )
     card.save(os.path.join(path, "README.md"))
     print(f"Upload successful, go to https://huggingface.co/{upload_repo} for details.")
+def convert(
+    hf_path: str,
+    mlx_path: str = "mlx_model",
+    quantize: bool = False,
+    q_group_size: int = 64,
+    q_bits: int = 4,
+    dtype: Optional[str] = None,
+    upload_repo: str = None,
+    revision: Optional[str] = None,
+    dequantize: bool = False,
+    quant_predicate: Optional[
+        Union[Callable[[str, nn.Module, dict], Union[bool, dict]], str]
+    ] = None, # mlx-lm
+    skip_vision: bool = False, # mlx-vlm
+    trust_remote_code: bool = True, # mlx-vlm
+) -> Runtime :
+    def mlx_lm_convert():
+        mlx_lm.convert(
+            hf_path=hf_path,
+            mlx_path=mlx_path,
+            quantize=quantize,
+            q_group_size=q_group_size,
+            q_bits=q_bits,
+            dtype=dtype,
+            upload_repo=upload_repo,
+            revision=revision,
+            dequantize=dequantize,
+            quant_predicate=quant_predicate,
+        )
+    def mlx_vlm_convert():
+        mlx_vlm.convert(
+            hf_path=hf_path,
+            mlx_path=mlx_path,
+            quantize=quantize,
+            q_group_size=q_group_size,
+            q_bits=q_bits,
+            dtype=dtype,
+            upload_repo=upload_repo,
+            revision=revision,
+            dequantize=dequantize,
+            skip_vision=skip_vision,
+            trust_remote_code=trust_remote_code,
+        )
+    model_path = get_model_path(hf_path, revision=revision)
+    config = load_config(model_path)
+    model_type = config["model_type"]
+    model_type = MODEL_REMAPPING.get(model_type, model_type)
+    is_lm = importlib.util.find_spec(f"mlx_lm.models.{model_type}") is not None
+    is_vlm = importlib.util.find_spec(f"mlx_vlm.models.{model_type}") is not None
+    if is_lm and (not is_vlm):
+        mlx_lm_convert()
+        runtime = Runtime.MLX_LM
+    elif is_vlm and (not is_lm):
+        mlx_vlm_convert()
+        runtime = Runtime.MLX_VLM
+    else:
+        # fallback in-case our MODEL_REMAPPING is outdated
+        try:
+            mlx_vlm_convert()
+            runtime = Runtime.MLX_VLM
+        except Exception as e:
+            mlx_lm_convert()
+            runtime = Runtime.MLX_LM
+    return runtime
 def process_model(model_id, q_method, oauth_token: gr.OAuthToken | None):
     if oauth_token.token is None:
         raise ValueError("You must be logged in to use MLX-my-repo")
             with tempfile.TemporaryDirectory(dir="converted") as tmpdir:
                 # The target directory must not exist
                 mlx_path = os.path.join(tmpdir, "mlx")
+                runtime = convert(model_id, mlx_path=mlx_path, quantize=False, dtype="float16")
                 print("Conversion done")
+                upload_to_hub(path=mlx_path, upload_repo=upload_repo, hf_path=model_id, oauth_token=oauth_token, runtime=runtime)
                 print("Upload done")
         else:
             q_bits = QUANT_PARAMS[q_method]
             with tempfile.TemporaryDirectory(dir="converted") as tmpdir:
                 # The target directory must not exist
                 mlx_path = os.path.join(tmpdir, "mlx")
+                runtime = convert(model_id, mlx_path=mlx_path, quantize=True, q_bits=q_bits)
                 print("Conversion done")
+                upload_to_hub(path=mlx_path, upload_repo=upload_repo, hf_path=model_id, oauth_token=oauth_token, runtime=runtime)
                 print("Upload done")
         return (
             f'Find your repo <a href="https://hf.co/{upload_repo}" target="_blank" style="text-decoration:underline">here</a>',

requirements.txt CHANGED Viewed

@@ -1,6 +1,10 @@
 huggingface-hub
 hf-transfer
 gradio[oauth]>=4.28.0
 gradio_huggingfacehub_search==0.0.7
 APScheduler
-mlx-lm

 huggingface-hub
 hf-transfer
 gradio[oauth]>=4.28.0
+gradio<5.0,>=4.0 # gradio-huggingfacehub-search 0.0.7 requires gradio<5.0,>=4.0
 gradio_huggingfacehub_search==0.0.7
 APScheduler
+mlx-lm
+mlx-vlm
+torch
+torchvision