from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import gradio as gr

# Load Llama 3.2 model
model_name = "meta-llama/Llama-3.2-3B-Instruct"  # Replace with the exact model path
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)

# Helper function to process long contexts
MAX_TOKENS = 100000  # Replace with the max token limit of the Llama model