fix: update code

Files changed (7) hide show

conver_to_onnx.py → convert_to_onnx.py +25 -25
onnx/added_tokens.json +0 -3
onnx/config.json +0 -28
onnx/model_quantized.onnx +3 -0
onnx/special_tokens_map.json +0 -51
test_model.js +13 -9
onnx/tokenizer_config.json → tokenizer.json +0 -0

conver_to_onnx.py → convert_to_onnx.py RENAMED Viewed

@@ -1,5 +1,6 @@
 from pathlib import Path
 import onnx
 from onnxconverter_common import float16
 from onnxruntime.quantization import quantize_dynamic, QuantType
@@ -18,37 +19,36 @@ print("Exporting the FP32 model...")
 model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
 model.save_pretrained(output_dir)
-# Save the tokenizer alongside the model
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-tokenizer.save_pretrained(Path("."))
-# Define FP32 model path
 model_fp32_path = output_dir / "model.onnx"
 # -------------------------------------------
-# Step 2: Convert FP32 model to FP16
 # -------------------------------------------
-print("Converting to FP16...")
-model_fp16_path = output_dir / "model-fp16.onnx"
-# Load the FP32 ONNX model
-model_fp32 = onnx.load(model_fp32_path.as_posix())
-# Convert weights to FP16 while keeping input/output types in FP32 if needed
-model_fp16 = float16.convert_float_to_float16(model_fp32, keep_io_types=True)
-# Save the FP16 model
-onnx.save(model_fp16, model_fp16_path.as_posix())
-# -------------------------------------------
-# Step 3: Convert FP32 model to INT8 (Dynamic Quantization)
-# -------------------------------------------
-print("Converting to INT8 (dynamic quantization)...")
-model_int8_path = output_dir / "model-int8.onnx"
 quantize_dynamic(
-    model_fp32_path.as_posix(),
-    model_int8_path.as_posix(),
-    weight_type=QuantType.QInt8  # Use QInt8 or QUInt8 depending on your requirements
 )
-print("✅ Model conversion complete!")
-print(f"FP32 model: {model_fp32_path}")
-print(f"FP16 model: {model_fp16_path}")
-print(f"INT8 model: {model_int8_path}")

 from pathlib import Path
 import onnx
+import shutil
 from onnxconverter_common import float16
 from onnxruntime.quantization import quantize_dynamic, QuantType
 model = ORTModelForFeatureExtraction.from_pretrained(model_name, export=True)
 model.save_pretrained(output_dir)
+# Save tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name)
+tokenizer.save_pretrained(output_dir)
+# Define model paths
 model_fp32_path = output_dir / "model.onnx"
+model_quantized_path = output_dir / "model_quantized.onnx"
 # -------------------------------------------
+# Step 2: Quantize to INT8
 # -------------------------------------------
+print("Quantizing to INT8 (dynamic quantization)...")
 quantize_dynamic(
+    model_input=model_fp32_path.as_posix(),
+    model_output=model_quantized_path.as_posix(),
+    weight_type=QuantType.QInt8,
 )
+# -------------------------------------------
+# Step 3: Move JSON files to parent folder
+# -------------------------------------------
+print("Moving JSON files to parent folder...")
+parent_dir = output_dir.parent
+json_files = list(output_dir.glob("*.json"))
+for json_file in json_files:
+    shutil.move(str(json_file), str(parent_dir / json_file.name))
+print("✅ Conversion complete!")
+print(f"Original FP32 model: {model_fp32_path}")
+print(f"Quantized INT8 model: {model_quantized_path}")
+print(f"Tokenizer files moved to: {[f.name for f in json_files]}")
+print(f"ONNX files remain in: {output_dir}")

onnx/added_tokens.json DELETED Viewed

@@ -1,3 +0,0 @@
-{
-  "<mask>": 64000
-}

onnx/config.json DELETED Viewed

@@ -1,28 +0,0 @@
-{
-  "architectures": [
-    "RobertaModel"
-  ],
-  "attention_probs_dropout_prob": 0.1,
-  "bos_token_id": 0,
-  "classifier_dropout": null,
-  "eos_token_id": 2,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
-  "initializer_range": 0.02,
-  "intermediate_size": 3072,
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 258,
-  "model_type": "roberta",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 1,
-  "position_embedding_type": "absolute",
-  "tokenizer_class": "PhobertTokenizer",
-  "torch_dtype": "float32",
-  "transformers_version": "4.50.3",
-  "type_vocab_size": 1,
-  "use_cache": true,
-  "vocab_size": 64001
-}

onnx/model_quantized.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7292219a5f4f8c1f5a71fd2c1688debeb67167a180e4186b3c78c27bfe257c83
+size 135252876

onnx/special_tokens_map.json DELETED Viewed

@@ -1,51 +0,0 @@
-{
-  "bos_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "cls_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "mask_token": {
-    "content": "<mask>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "<pad>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
-}

test_model.js CHANGED Viewed

@@ -1,14 +1,18 @@
-// Force offline mode to prevent remote lookups.
-process.env.HF_HUB_OFFLINE = "1";
-import { AutoModel } from '@xenova/transformers';
 async function run() {
-  // Since test_model.js is inside your model folder,
-  // use '.' to refer to the current folder.
-  const model = await AutoModel.from_pretrained('.', { localFilesOnly: true });
-  const output = await model('Mô hình thử nghiệm');
-  console.log(output);
 }
 run();

+import { pipeline } from '@xenova/transformers';
 async function run() {
+    // Load the pipeline for feature extraction (embeddings)
+    const extractor = await pipeline(
+        'feature-extraction',
+        'laituanmanh32/vietnamese-embedding-onnx',
+        {
+            quantized: false, // Disable quantization (if using full-precision ONNX)
+        }
+    );
+    // Generate embeddings for Vietnamese text
+    const output = await extractor('Mô hình thử nghiệm', { pooling: 'mean' });
+    console.log(output);
 }
 run();

onnx/tokenizer_config.json → tokenizer.json RENAMED Viewed

File without changes