Modelo projecte-aina/aina-translator-es-ast-quantized

versión de:

projecte-aina/aina-translator-es-ast cuantizado a qint8

Página web del projecte aina

licencia:

La del proyecto original. Yo solo hice la cuantización y lo publico por conveniencia.

Autor:

Luis Virgós ([email protected])

Detalles

Es un modelo Pytorch, basado en el modelo M2M100. Está cuantizado con torch a qint8.

Para utilizarlo hay que leerlo con torch.load(). Ver ejemplo más abajo.

El tokenizador debe ser facebook/nllb-200-distilled-600M, del proyecto nllb (no lenguage left behind)

Como se generó este modelo

Comienzo

Inicialización

import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import warnings
warnings.filterwarnings("ignore")
import logging
logging.getLogger("transformers").setLevel(logging.ERROR)

import torch
import tensorflow as tf

from google.colab import userdata
miToken = userdata.get('HF_TOKEN')
from huggingface_hub import login
login(token=miToken)

Leemos modelos

from transformers import M2M100ForConditionalGeneration, NllbTokenizer, AutoModelForCausalLM, GPTQConfig


model_name = "projecte-aina/aina-translator-es-ast"
tokenizer_name = "facebook/nllb-200-distilled-600M"

tokenizer = NllbTokenizer.from_pretrained(tokenizer_name, token=True, src_lang="spa_Latn")

model = M2M100ForConditionalGeneration.from_pretrained(model_name)


VOCAB_SIZE = tokenizer.vocab_size
SEQ_LEN = 32
D_EMB = 768

Creamos una función para probar el modelo

def traducir(modelo, texto):
  #texto = "Tener un perro es bueno"
  print(texto)
  in_tokens = tokenizer.encode(texto, return_tensors="tf")
  for token in in_tokens[0]:
        print(f"{token}: \"{tokenizer.decode([token])}\"")

  # Convertimos tensor de TensorFlow a PyTorch y lo asignamos a CPU
  device = torch.device('cpu')
  print(f"Using device: {device}") # Optional: Print the chosen device
  in_tokens_pt = torch.from_dlpack(tf.experimental.dlpack.to_dlpack(in_tokens)).to(device)
  print ("-------------")
  print(in_tokens_pt)
  print ("-------------")

  translated_tokens = modelo.generate(in_tokens_pt, forced_bos_token_id=tokenizer.convert_tokens_to_ids("ast_Latn"))
  for token in translated_tokens[0]:
        print(f"{token}: \"{tokenizer.decode([token])}\"")

  result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
  print (result)


def print_size_of_model(model):
    torch.save(model.state_dict(), "temp.p")
    print('Size (MB):', os.path.getsize("temp.p")/1e6)
    os.remove('temp.p')

Cuantizamos y salvamos

cuantizamos a qint8, con lo que queda un modelo de tamaño mitad

import torch
quantized_model = torch.quantization.quantize_dynamic(
    model, {torch.nn.Linear}, dtype=torch.qint8
)

# save locally
import os
nombre_cuantizado = "aina-translator-es-ast-quantized"
os.makedirs(nombre_cuantizado, exist_ok=True)
# El fichero salvado se descarga y carga en Huggingface
torch.save(quantized_model,f"{nombre_cuantizado}/pytorch_model.pt")

Para probar el modelo subido

# Para probar el que esta en huggingface
import torch
import os
nombre_directorio = "aina-translator-es-ast-quantized"
os.makedirs(nombre_directorio, exist_ok=True)

nombre_fichero = "pytorch_model.pt"

# Hacemos login en HuggingFace para poder descargar el modelo
from google.colab import userdata
miToken = userdata.get('HF_TOKEN')
from huggingface_hub import login
login(token=miToken)
# Descargamos el modelo
from huggingface_hub import hf_hub_download
hf_hub_download(repo_id="meinvirgos/" + nombre_directorio, filename=nombre_fichero, local_dir = "./" + nombre_directorio)
# Leemos el modelo
nombre_modelo = nombre_directorio + "/" + nombre_fichero
model_quantized = torch.load(nombre_modelo, weights_only = False)
# Leemos también el tokenizador
from transformers import M2M100ForConditionalGeneration, NllbTokenizer, AutoModelForCausalLM, GPTQConfig
tokenizer_name = "facebook/nllb-200-distilled-600M"
tokenizer = NllbTokenizer.from_pretrained(tokenizer_name, token=True, src_lang="spa_Latn")

# Ahora la prueba
in_tokens = tokenizer.encode("Yo era rubio", return_tensors="tf")
# Convertimos tensor de TensorFlow a PyTorch y lo asignamos a CPU
device = torch.device('cpu')
import tensorflow as tf
in_tokens_pt = torch.from_dlpack(tf.experimental.dlpack.to_dlpack(in_tokens)).to(device)
# generamos respuesta
translated_tokens = model_quantized.generate(in_tokens_pt, forced_bos_token_id=tokenizer.convert_tokens_to_ids("ast_Latn"))
# decodificamos respuesta
result = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)[0]
print (result)
# Yo yera rubín, o Yo yera roxu

Downloads last month: 2

Safetensors

Model size

445M params

Tensor type

F32

F16

Inference Providers NEW

This model isn't deployed by any Inference Provider. 🙋 Ask for provider support

Model tree for meinvirgos/aina-translator-es-ast-quantized

Base model

facebook/nllb-200-distilled-600M

Quantized

(5)

this model