Upload 8 files

Browse files

Files changed (9) hide show

.gitattributes +7 -0
README.md +110 -3
qwen3-0.6B-Q3_K.gguf +3 -0
qwen3-0.6B-Q4_0.gguf +3 -0
qwen3-0.6B-Q4_K_M.gguf +3 -0
qwen3-0.6B-Q5_K_M.gguf +3 -0
qwen3-0.6B-Q8_0.gguf +3 -0
qwen3-0.6B-f16.gguf +3 -0
qwen3-0.6B-f32.gguf +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+qwen3-0.6B-f16.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-0.6B-f32.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-0.6B-Q3_K.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-0.6B-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-0.6B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-0.6B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
+qwen3-0.6B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -1,3 +1,110 @@
----
-license: mit
----

+# Qwen3-0.6B-T5-xxl-GGUF
+## Model Description
+This repository provides GGUF quantized versions of the `Qwen3-0.6B-T5-xxl` model body. These models are designed for fast, low-resource inference on CPUs.
+The goal of this project is to replicate the embedding outputs of `google/t5-v1_1-xxl` using a highly optimized pipeline.
+To make this repository fully functional out-of-the-box, the fine-tuned **projection head is also included**. This allows you to combine the GGUF model with the PyTorch-based head to get the final 4096-dimension embeddings.
+## Repository Contents
+- `qwen3-0.6B-Q4_K_M.gguf`: The model body quantized using the Q4_K_M method. (And potentially other quantizations).
+- **/projection_head/projection_head.pth**: The PyTorch state dictionary for the final projection layer.
+## How to Use: Hybrid GGUF + PyTorch Pipeline
+This tutorial shows how to use the GGUF model for fast base embedding generation and the PyTorch head for the final projection.
+### Step 1: Prerequisites
+First, install the necessary libraries. `llama-cpp-python` is required to run GGUF models.
+```
+pip install llama-cpp-python torch numpy
+```
+### Step 2: Inference Script
+The following script encapsulates the entire hybrid pipeline into a convenient class. You can save it as a `.py` file and import it into your projects.
+```python
+import torch
+from torch import nn
+from llama_cpp import Llama
+import numpy as np
+class HybridEmbedder:
+    """
+    A class that encapsulates the hybrid embedding pipeline.
+    It loads the models once at initialization for optimal performance.
+    """
+    def __init__(self, gguf_path: str, head_path: str, n_ctx: int = 512):
+        print("Initializing HybridEmbedder...")
+        # 1. Load the GGUF body
+        print(f"Loading GGUF body from: {gguf_path}")
+        self.body_model = Llama(
+            model_path=gguf_path,
+            embedding=True,
+            n_ctx=n_ctx,
+            verbose=False
+        )
+        print(" -> GGUF body loaded.")
+        # 2. Load the PyTorch projection head
+        print(f"Loading projection head from: {head_path}")
+        input_dim = self.body_model.n_embd()
+        hidden_dim = 2048
+        output_dim = 4096
+        self.head_model = nn.Sequential(
+            nn.Linear(input_dim, hidden_dim),
+            nn.GELU(),
+            nn.Dropout(0.1),
+            nn.Linear(hidden_dim, output_dim)
+        )
+        self.head_model.load_state_dict(torch.load(head_path))
+        self.head_model.eval()
+        print(" -> Projection head loaded.")
+        print("\n✅ Embedder is ready to use.")
+    def get_embedding(self, text: str) -> torch.Tensor:
+        # a) Get the sequence of token embeddings from the GGUF model
+        token_embeddings = self.body_model.embed(text)
+        # b) Apply Mean Pooling to get a single sentence vector
+        sentence_embedding = np.mean(token_embeddings, axis=0)
+        # c) Convert to a PyTorch tensor and add a batch dimension
+        sentence_tensor = torch.tensor(sentence_embedding).unsqueeze(0)
+        # d) Pass through the projection head
+        with torch.no_grad():
+            final_embedding = self.head_model(sentence_tensor.float())
+        return final_embedding
+# --- Example Usage ---
+if __name__ == "__main__":
+    # Define the paths to your local model files
+    GGUF_FILE = "qwen3-0.6B-Q4_K_M.gguf"
+    HEAD_FILE = "./projection_head/projection_head.pth"
+    # Create an instance of our embedder
+    embedder = HybridEmbedder(gguf_path=GGUF_FILE, head_path=HEAD_FILE)
+    # Use the embedder to get vectors
+    prompt = "A sprawling fantasy city built into a giant tree."
+    embedding = embedder.get_embedding(prompt)
+    print("\n--- Inference Test ---")
+    print(f"Prompt: '{prompt}'")
+    print(f"Output dimension: {embedding.shape}")
+    print(f"Vector excerpt: {embedding[0, :5]}...")
+```
+## License
+This repository is licensed under the **MIT License**.

qwen3-0.6B-Q3_K.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:148dcefa5526e89a80f694dbafaa5b250bb00b1b3f6e68bc009722dd7a629796
+size 346896032

qwen3-0.6B-Q4_0.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2ffaad38b7334551e0ffba91f3cc08017a1ee418b8e40f8b61048ee20e41da23
+size 381335200

qwen3-0.6B-Q4_K_M.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1d058f00eeba52a245283d1a3ff8c13e91c6e6ced1f0363f37c620c3e34316ee
+size 396474016

qwen3-0.6B-Q5_K_M.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:110961bdcc0c3106d29c5c976df1712bf1999c93ac828d3ed457b7d326112e7d
+size 444184224

qwen3-0.6B-Q8_0.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:739d45f900f3fe7af64d174de5646a6fb78aa8e4bb1fde5e1a15f82cbce0496b
+size 639150048

qwen3-0.6B-f16.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3dd44d5dc85e73b91bc423092e3b6121c4b97195ad129c9359c575c3929c32ad
+size 1197629088

qwen3-0.6B-f32.gguf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:155984a1ddc3e652a50cff9504e1e2dba1529e9fe5e062519139e4cf5ce80dc4
+size 2389051040