aifeifei798's picture
Upload 11 files
53a8b66 verified
raw
history blame
908 Bytes
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import bitsandbytes as bnb
# 加载模型和分词器
model_name = "./Llama-3.1-Nemotron-Nano-8B-v1"
# 配置量化参数
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_quant_type="nf4",
bnb_4bit_use_double_quant=True,
llm_int8_skip_modules=["lm_head", "multi_modal_projector", "merger", "modality_projection", "model.layers.1.mlp"],
)
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=quantization_config,
device_map="auto" # 自动分配设备
)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# 保存量化后的模型
model.save_pretrained("Llama-3.1-Nemotron-Nano-8B-v1-bnb-4bit")
tokenizer.save_pretrained("Llama-3.1-Nemotron-Nano-8B-v1-bnb-4bit")