from transformers import pipeline class LLMHandler: def __init__(self): self.pipe = pipeline( "text2text-generation", model="google/flan-t5-small", device=-1, # Force CPU torch_dtype="auto" ) def generate_response(self, prompt, context): full_prompt = f"Context: {context}\nQuestion: {prompt}\nAnswer:" return self.pipe( full_prompt, max_length=512, do_sample=True, temperature=0.7 )[0]['generated_text']