JusteLeo commited on
Commit
348e6ff
·
verified ·
1 Parent(s): 9d0a525

Upload 8 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ qwen3-0.6B-f16.gguf filter=lfs diff=lfs merge=lfs -text
37
+ qwen3-0.6B-f32.gguf filter=lfs diff=lfs merge=lfs -text
38
+ qwen3-0.6B-Q3_K.gguf filter=lfs diff=lfs merge=lfs -text
39
+ qwen3-0.6B-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
40
+ qwen3-0.6B-Q4_K_M.gguf filter=lfs diff=lfs merge=lfs -text
41
+ qwen3-0.6B-Q5_K_M.gguf filter=lfs diff=lfs merge=lfs -text
42
+ qwen3-0.6B-Q8_0.gguf filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,110 @@
1
- ---
2
- license: mit
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Qwen3-0.6B-T5-xxl-GGUF
2
+
3
+ ## Model Description
4
+
5
+ This repository provides GGUF quantized versions of the `Qwen3-0.6B-T5-xxl` model body. These models are designed for fast, low-resource inference on CPUs.
6
+
7
+ The goal of this project is to replicate the embedding outputs of `google/t5-v1_1-xxl` using a highly optimized pipeline.
8
+
9
+ To make this repository fully functional out-of-the-box, the fine-tuned **projection head is also included**. This allows you to combine the GGUF model with the PyTorch-based head to get the final 4096-dimension embeddings.
10
+
11
+ ## Repository Contents
12
+
13
+ - `qwen3-0.6B-Q4_K_M.gguf`: The model body quantized using the Q4_K_M method. (And potentially other quantizations).
14
+ - **/projection_head/projection_head.pth**: The PyTorch state dictionary for the final projection layer.
15
+
16
+ ## How to Use: Hybrid GGUF + PyTorch Pipeline
17
+
18
+ This tutorial shows how to use the GGUF model for fast base embedding generation and the PyTorch head for the final projection.
19
+
20
+ ### Step 1: Prerequisites
21
+
22
+ First, install the necessary libraries. `llama-cpp-python` is required to run GGUF models.
23
+
24
+ ```
25
+ pip install llama-cpp-python torch numpy
26
+ ```
27
+
28
+ ### Step 2: Inference Script
29
+
30
+ The following script encapsulates the entire hybrid pipeline into a convenient class. You can save it as a `.py` file and import it into your projects.
31
+
32
+ ```python
33
+ import torch
34
+ from torch import nn
35
+ from llama_cpp import Llama
36
+ import numpy as np
37
+
38
+ class HybridEmbedder:
39
+ """
40
+ A class that encapsulates the hybrid embedding pipeline.
41
+ It loads the models once at initialization for optimal performance.
42
+ """
43
+ def __init__(self, gguf_path: str, head_path: str, n_ctx: int = 512):
44
+ print("Initializing HybridEmbedder...")
45
+
46
+ # 1. Load the GGUF body
47
+ print(f"Loading GGUF body from: {gguf_path}")
48
+ self.body_model = Llama(
49
+ model_path=gguf_path,
50
+ embedding=True,
51
+ n_ctx=n_ctx,
52
+ verbose=False
53
+ )
54
+ print(" -> GGUF body loaded.")
55
+
56
+ # 2. Load the PyTorch projection head
57
+ print(f"Loading projection head from: {head_path}")
58
+ input_dim = self.body_model.n_embd()
59
+ hidden_dim = 2048
60
+ output_dim = 4096
61
+
62
+ self.head_model = nn.Sequential(
63
+ nn.Linear(input_dim, hidden_dim),
64
+ nn.GELU(),
65
+ nn.Dropout(0.1),
66
+ nn.Linear(hidden_dim, output_dim)
67
+ )
68
+ self.head_model.load_state_dict(torch.load(head_path))
69
+ self.head_model.eval()
70
+ print(" -> Projection head loaded.")
71
+ print("\n✅ Embedder is ready to use.")
72
+
73
+ def get_embedding(self, text: str) -> torch.Tensor:
74
+ # a) Get the sequence of token embeddings from the GGUF model
75
+ token_embeddings = self.body_model.embed(text)
76
+
77
+ # b) Apply Mean Pooling to get a single sentence vector
78
+ sentence_embedding = np.mean(token_embeddings, axis=0)
79
+
80
+ # c) Convert to a PyTorch tensor and add a batch dimension
81
+ sentence_tensor = torch.tensor(sentence_embedding).unsqueeze(0)
82
+
83
+ # d) Pass through the projection head
84
+ with torch.no_grad():
85
+ final_embedding = self.head_model(sentence_tensor.float())
86
+
87
+ return final_embedding
88
+
89
+ # --- Example Usage ---
90
+ if __name__ == "__main__":
91
+ # Define the paths to your local model files
92
+ GGUF_FILE = "qwen3-0.6B-Q4_K_M.gguf"
93
+ HEAD_FILE = "./projection_head/projection_head.pth"
94
+
95
+ # Create an instance of our embedder
96
+ embedder = HybridEmbedder(gguf_path=GGUF_FILE, head_path=HEAD_FILE)
97
+
98
+ # Use the embedder to get vectors
99
+ prompt = "A sprawling fantasy city built into a giant tree."
100
+ embedding = embedder.get_embedding(prompt)
101
+
102
+ print("\n--- Inference Test ---")
103
+ print(f"Prompt: '{prompt}'")
104
+ print(f"Output dimension: {embedding.shape}")
105
+ print(f"Vector excerpt: {embedding[0, :5]}...")
106
+ ```
107
+
108
+ ## License
109
+
110
+ This repository is licensed under the **MIT License**.
qwen3-0.6B-Q3_K.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:148dcefa5526e89a80f694dbafaa5b250bb00b1b3f6e68bc009722dd7a629796
3
+ size 346896032
qwen3-0.6B-Q4_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ffaad38b7334551e0ffba91f3cc08017a1ee418b8e40f8b61048ee20e41da23
3
+ size 381335200
qwen3-0.6B-Q4_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d058f00eeba52a245283d1a3ff8c13e91c6e6ced1f0363f37c620c3e34316ee
3
+ size 396474016
qwen3-0.6B-Q5_K_M.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:110961bdcc0c3106d29c5c976df1712bf1999c93ac828d3ed457b7d326112e7d
3
+ size 444184224
qwen3-0.6B-Q8_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:739d45f900f3fe7af64d174de5646a6fb78aa8e4bb1fde5e1a15f82cbce0496b
3
+ size 639150048
qwen3-0.6B-f16.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dd44d5dc85e73b91bc423092e3b6121c4b97195ad129c9359c575c3929c32ad
3
+ size 1197629088
qwen3-0.6B-f32.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:155984a1ddc3e652a50cff9504e1e2dba1529e9fe5e062519139e4cf5ce80dc4
3
+ size 2389051040