import gradio as gr # Load model directly from transformers import AutoTokenizer, AutoModelForSeq2SeqLM tokenizer = AutoTokenizer.from_pretrained("Tianlin668/MentalBART") model = AutoModelForSeq2SeqLM.from_pretrained("Tianlin668/MentalBART") # Define the inference function def inference(prompt): # Encode the input prompt input_ids = tokenizer.encode(prompt, return_tensors="pt") # Generate a response # You can adjust max_length and num_beams according to your needs output = model.generate(input_ids, max_length=150, num_beams=5, early_stopping=True) # Decode the generated output result = tokenizer.decode(output[0], skip_special_tokens=True) # Return the generated response from the model return result with gr.Blocks() as demo: gr.Markdown("