from transformers import AutoTokenizer, AutoModelForCausalLM import torch import gradio as gr # Load Llama 3.2 model model_name = "meta-llama/Llama-3.2-3B-Instruct" # Replace with the exact model path tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16) # Helper function to process long contexts MAX_TOKENS = 100000 # Replace with the max token limit of the Llama model