Spaces:
Runtime error
Runtime error
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
# 加载模型和分词器 | |
model_name = "jinaai/reader-lm-1.5b" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16).to("cuda") | |
# 定义转换函数 | |
def html_to_markdown(html_content): | |
messages = [{"role": "user", "content": html_content}] | |
input_text = tokenizer.apply_chat_template(messages, tokenize=False) | |
inputs = tokenizer.encode(input_text, return_tensors="pt").to("cuda") | |
outputs = model.generate(inputs, max_new_tokens=1024, temperature=0, do_sample=False, repetition_penalty=1.08) | |
markdown_content = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
return markdown_content | |
# 创建 Gradio 接口 | |
iface = gr.Interface( | |
fn=html_to_markdown, | |
inputs=gr.inputs.Textbox(lines=20, label="HTML 内容"), | |
outputs=gr.outputs.Textbox(label="Markdown 内容"), | |
title="HTML 转 Markdown 转换器", | |
description="输入 HTML 内容,模型将其转换为 Markdown 格式。", | |
) | |
if __name__ == "__main__": | |
iface.launch() | |