import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # 加载模型和分词器 model_name = "jinaai/reader-lm-1.5b" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to("cuda") # 定义转换函数 def html_to_markdown(html_content): messages = [{"role": "user", "content": html_content}] input_text = tokenizer.apply_chat_template(messages, tokenize=False) inputs = tokenizer.encode(input_text, return_tensors="pt").to("cuda") outputs = model.generate(inputs, max_new_tokens=1024, temperature=0, do_sample=False, repetition_penalty=1.08) markdown_content = tokenizer.decode(outputs[0], skip_special_tokens=True) return markdown_content # 创建 Gradio 接口 iface = gr.Interface( fn=html_to_markdown, inputs=gr.inputs.Textbox(lines=20, label="HTML 内容"), outputs=gr.outputs.Textbox(label="Markdown 内容"), title="HTML 转 Markdown 转换器", description="输入 HTML 内容,模型将其转换为 Markdown 格式。", ) if __name__ == "__main__": iface.launch()