import gradio as gr from pdfitdown.pdfconversion import Converter import fitz from typing import List from PIL import Image from loadimg import load_img import io from markitdown import MarkItDown from docx import Document import pdfplumber from pptx import Presentation converter = Converter() md = MarkItDown() def convert_file_to_pdf(filename: str) -> str: """ Converts a markdown file to PDF format. Args: filename: str The path to the markdown file to be converted. Returns: str: The file path of the generated PDF file. """ output_path = filename.name.rsplit(".", 1)[0] + ".pdf" converter.convert(filename.name, output_path) return output_path def convert_file_to_img(image_file: str = None, txt: str = "") -> List[Image.Image]: """ Convert an image file to PDF format. Args: image_file: A file object containing the image to be converted. The file must be in a format supported by the converter (e.g., PNG, JPG, JPEG). Returns: str: The file path of the generated PDF file. The output filename will be the same as the input filename but with a .pdf extension. """ img_list = [] if txt != "": img_list.append(load_img(txt, output_type="pil")) if image_file is not None: output_path = image_file.name.rsplit(".", 1)[0] + ".pdf" converter.convert(image_file.name, output_path) doc = fitz.open(output_path) for page in doc: page_bytes = page.get_pixmap().tobytes("png") img_list.append(load_img(Image.open(io.BytesIO(page_bytes))).convert("RGB")) doc.close() return img_list def convert_file_to_markdown(filename: str) -> str: """ Converts a file to markdown format using markitdown. Args: filename: str The path to the file to be converted. Returns: str: The markdown representation of the file. """ return md.convert(filename.name).text_content def convert_pdf_to_word(filename: str) -> str: """ Converts a PDF file to Word format. Args: filename: str The path to the PDF file to be converted. Returns: str: The file path of the generated Word file. """ output_path = filename.name.rsplit(".", 1)[0] + ".docx" doc = Document() with pdfplumber.open(filename.name) as pdf: for page in pdf.pages: text = page.extract_text() if text: doc.add_paragraph(text) doc.save(output_path) return output_path def convert_pdf_to_pptx(filename: str) -> str: """ Converts a PDF file to PowerPoint (PPTX) format. Args: filename: str The path to the PDF file to be converted. Returns: str: The file path of the generated PPTX file. """ output_path = filename.name.rsplit(".", 1)[0] + ".pptx" prs = Presentation() blank_slide_layout = prs.slide_layouts[5] # Title Only with pdfplumber.open(filename.name) as pdf: for page in pdf.pages: text = page.extract_text() slide = prs.slides.add_slide(blank_slide_layout) if text: textbox = slide.shapes.add_textbox( 0, 0, prs.slide_width, prs.slide_height ) textbox.text = text prs.save(output_path) return output_path # Create individual interfaces file_to_pdf = gr.Interface( fn=convert_file_to_pdf, inputs=gr.File(label="Upload README/Markdown file"), outputs=gr.File(label="Converted PDF"), title="File to PDF Converter", description="Convert your files to PDF format", ) file_to_image = gr.Interface( fn=convert_file_to_img, inputs=[gr.File(label="Upload Image"), gr.Textbox(label="base64, url")], outputs=gr.Gallery(label="Converted Images"), title="File to Images Converter", description="Convert your images to an image format", ) file_to_markdown = gr.Interface( fn=convert_file_to_markdown, inputs=gr.File(label="Upload File"), outputs=gr.Textbox(label="Converted Markdown"), title="File to Markdown Converter", description="Convert your files to markdown format", ) pdf_to_word = gr.Interface( fn=convert_pdf_to_word, inputs=gr.File(label="Upload PDF file"), outputs=gr.File(label="Converted Word Document"), title="PDF to Word Converter", description="Convert your PDF files to Word format", ) pdf_to_pptx = gr.Interface( fn=convert_pdf_to_pptx, inputs=gr.File(label="Upload PDF file"), outputs=gr.File(label="Converted PowerPoint Presentation"), title="PDF to PowerPoint Converter", description="Convert your PDF files to PowerPoint (PPTX) format", ) # Create tabbed interface demo = gr.TabbedInterface( [file_to_pdf, file_to_image, file_to_markdown, pdf_to_word, pdf_to_pptx], [ "File to PDF", "File to Image", "File to Markdown", "PDF to Word", "PDF to PowerPoint", ], ) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860, debug=True, mcp_server=True)