tools / app.py
not-lain's picture
add pptx api
5ba8cc5
import gradio as gr
from pdfitdown.pdfconversion import Converter
import fitz
from typing import List
from PIL import Image
from loadimg import load_img
import io
from markitdown import MarkItDown
from docx import Document
import pdfplumber
from pptx import Presentation
converter = Converter()
md = MarkItDown()
def convert_file_to_pdf(filename: str) -> str:
"""
Converts a markdown file to PDF format.
Args:
filename: str
The path to the markdown file to be converted.
Returns:
str: The file path of the generated PDF file.
"""
output_path = filename.name.rsplit(".", 1)[0] + ".pdf"
converter.convert(filename.name, output_path)
return output_path
def convert_file_to_img(image_file: str = None, txt: str = "") -> List[Image.Image]:
"""
Convert an image file to PDF format.
Args:
image_file: A file object containing the image to be converted.
The file must be in a format supported by the converter
(e.g., PNG, JPG, JPEG).
Returns:
str: The file path of the generated PDF file. The output filename will be
the same as the input filename but with a .pdf extension.
"""
img_list = []
if txt != "":
img_list.append(load_img(txt, output_type="pil"))
if image_file is not None:
output_path = image_file.name.rsplit(".", 1)[0] + ".pdf"
converter.convert(image_file.name, output_path)
doc = fitz.open(output_path)
for page in doc:
page_bytes = page.get_pixmap().tobytes("png")
img_list.append(load_img(Image.open(io.BytesIO(page_bytes))).convert("RGB"))
doc.close()
return img_list
def convert_file_to_markdown(filename: str) -> str:
"""
Converts a file to markdown format using markitdown.
Args:
filename: str
The path to the file to be converted.
Returns:
str: The markdown representation of the file.
"""
return md.convert(filename.name).text_content
def convert_pdf_to_word(filename: str) -> str:
"""
Converts a PDF file to Word format.
Args:
filename: str
The path to the PDF file to be converted.
Returns:
str: The file path of the generated Word file.
"""
output_path = filename.name.rsplit(".", 1)[0] + ".docx"
doc = Document()
with pdfplumber.open(filename.name) as pdf:
for page in pdf.pages:
text = page.extract_text()
if text:
doc.add_paragraph(text)
doc.save(output_path)
return output_path
def convert_pdf_to_pptx(filename: str) -> str:
"""
Converts a PDF file to PowerPoint (PPTX) format.
Args:
filename: str
The path to the PDF file to be converted.
Returns:
str: The file path of the generated PPTX file.
"""
output_path = filename.name.rsplit(".", 1)[0] + ".pptx"
prs = Presentation()
blank_slide_layout = prs.slide_layouts[5] # Title Only
with pdfplumber.open(filename.name) as pdf:
for page in pdf.pages:
text = page.extract_text()
slide = prs.slides.add_slide(blank_slide_layout)
if text:
textbox = slide.shapes.add_textbox(
0, 0, prs.slide_width, prs.slide_height
)
textbox.text = text
prs.save(output_path)
return output_path
# Create individual interfaces
file_to_pdf = gr.Interface(
fn=convert_file_to_pdf,
inputs=gr.File(label="Upload README/Markdown file"),
outputs=gr.File(label="Converted PDF"),
title="File to PDF Converter",
description="Convert your files to PDF format",
)
file_to_image = gr.Interface(
fn=convert_file_to_img,
inputs=[gr.File(label="Upload Image"), gr.Textbox(label="base64, url")],
outputs=gr.Gallery(label="Converted Images"),
title="File to Images Converter",
description="Convert your images to an image format",
)
file_to_markdown = gr.Interface(
fn=convert_file_to_markdown,
inputs=gr.File(label="Upload File"),
outputs=gr.Textbox(label="Converted Markdown"),
title="File to Markdown Converter",
description="Convert your files to markdown format",
)
pdf_to_word = gr.Interface(
fn=convert_pdf_to_word,
inputs=gr.File(label="Upload PDF file"),
outputs=gr.File(label="Converted Word Document"),
title="PDF to Word Converter",
description="Convert your PDF files to Word format",
)
pdf_to_pptx = gr.Interface(
fn=convert_pdf_to_pptx,
inputs=gr.File(label="Upload PDF file"),
outputs=gr.File(label="Converted PowerPoint Presentation"),
title="PDF to PowerPoint Converter",
description="Convert your PDF files to PowerPoint (PPTX) format",
)
# Create tabbed interface
demo = gr.TabbedInterface(
[file_to_pdf, file_to_image, file_to_markdown, pdf_to_word, pdf_to_pptx],
[
"File to PDF",
"File to Image",
"File to Markdown",
"PDF to Word",
"PDF to PowerPoint",
],
)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, mcp_server=True)