install the required packages : peft, transformers, BitsAndBytes , accelerate


import transformers
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from torch import cuda, bfloat16

base_model_id = 'meta-llama/Llama-2-7b-hf'

device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16
)


hf_auth = "hf_your-huggingface-access-token"
model_config = transformers.AutoConfig.from_pretrained(
    base_model_id,
    use_auth_token=hf_auth
)

model = transformers.AutoModelForCausalLM.from_pretrained(
    base_model_id,
    trust_remote_code=True,
    config=model_config,
    quantization_config=bnb_config,
    device_map='auto',
    use_auth_token=hf_auth
)

config = PeftConfig.from_pretrained("Ashishkr/PII-Masking")
model = PeftModel.from_pretrained(model, "Ashishkr/PII-Masking").to(device)

model.eval()
print(f"Model loaded on {device}")

tokenizer = transformers.AutoTokenizer.from_pretrained(
    base_model_id,
    use_auth_token=hf_auth
)
def remove_pii_info(
    model: AutoModelForCausalLM,
    tokenizer: AutoTokenizer,
    prompt: str,
    max_new_tokens: int = 128,
    temperature: float = 0.92):

    inputs = tokenizer(
        [prompt],
        return_tensors="pt",
        return_token_type_ids=False).to(device)

    max_new_tokens = inputs["input_ids"].shape[1]

    # Check if bfloat16 is supported, otherwise use float16
    dtype_to_use = torch.bfloat16 if torch.cuda.is_bf16_supported() else torch.float16

    with torch.autocast("cuda", dtype=dtype_to_use):
        response = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=temperature,
            return_dict_in_generate=True,
            eos_token_id=tokenizer.eos_token_id,
            pad_token_id=tokenizer.pad_token_id,
        )

    decoded_output = tokenizer.decode(
        response["sequences"][0],
        skip_special_tokens=True,
    )

    return decoded_output[len(prompt) :]

prompt = """
 Input: "John Doe, currently lives at 1234 Elm Street, Springfield, Anywhere 12345. 
 He can be reached at [email protected] or at the phone number 555-123-4567. His social security number is 123-45-6789,
  and he has a bank account number 9876543210 at Springfield Bank. John attended Springfield University where he earned 
  a Bachelor's degree in Computer Science. He now works at Acme Corp and his employee ID is 123456. John's medical record number
   is MRN-001234, and he has a history of asthma and high blood pressure. His primary care physician is Dr. Jane Smith, 
   who practices at Springfield Medical Center. His recent blood test results show a cholesterol level of 200 mg/dL and a 
   blood glucose level of 90 mg/dL.
" Output:  """
# You can use the function as before
response = remove_pii_info(
    model,
    tokenizer,
    prompt,
    temperature=0.7)

print(response)
Downloads last month
6
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.

Model tree for Ashishkr/llama2-PII-Masking

Adapter
(1771)
this model