|
--- |
|
license: cc-by-sa-4.0 |
|
language: |
|
- tig |
|
base_model: |
|
- meta-llama/Llama-3.2-1B |
|
--- |
|
```python |
|
import torch, logging |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
tig_model_path = "BeitTigreAI/tigre-llm-Llama3.2-1B" |
|
|
|
# Set the device for computation |
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
|
# Load the tokenizer and model from the specified path |
|
tokenizer = AutoTokenizer.from_pretrained(tig_model_path) |
|
model = AutoModelForCausalLM.from_pretrained(tig_model_path, device_map="auto") |
|
model = model.to(device) |
|
|
|
# Suppress some of the logging for a cleaner output |
|
logging.getLogger("transformers").setLevel(logging.ERROR) |
|
|
|
# Example 1: Generate text in Tigre (written in Ethiopic script) |
|
prompt = "[tig_Ethi]መርሐበ ብኩም" |
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
outputs = model.generate(**inputs, max_new_tokens=50) |
|
print("Tigre Output:") |
|
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) |
|
|
|
# Example 2: Generate text in Arabic |
|
prompt = "ما الذي يميز لغة التغري؟" |
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
outputs = model.generate(**inputs, max_new_tokens=40) |
|
print("\nArabic Output:") |
|
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) |
|
|
|
# Example 3: Generate text in English |
|
prompt = "[eng_Latn] What is interesting about the Tigre language?" |
|
inputs = tokenizer(prompt, return_tensors="pt").to(model.device) |
|
outputs = model.generate(**inputs, max_new_tokens=40) |
|
print("\nEnglish Output:") |
|
print(tokenizer.decode(outputs[0][inputs["input_ids"].shape[-1]:])) |