Spaces:

Alteredverse
/

open-catalog-parser

Build error

File size: 1,464 Bytes

43355d2
 
f8daace
17345fb
43355d2
4bc3210
f8daace
 
 
 
4bc3210
f8daace
 
17345fb
d27c873
f8daace
 
17345fb
f8daace
 
 
d27c873
f8daace
 
 
d27c873
f8daace

import os
import gradio as gr
import main
import shutil


def predict_from_pdf(pdf_file):
    # Create a temporary directory for file uploads
    upload_dir = "./catalogue/"
    os.makedirs(upload_dir, exist_ok=True)

    # Use the provided file path from Gradio's file object
    dest_file_path = os.path.join(upload_dir, os.path.basename(pdf_file.name))

    try:
        # Save the uploaded file using shutil.copy
        shutil.copy(pdf_file, dest_file_path)

        # Check if the file was saved successfully
        if not os.path.exists(dest_file_path):
            return None, f"Error: The file {dest_file_path} could not be found or opened."

        # Process the PDF and retrieve the product info
        df, response = main.process_pdf_catalog(dest_file_path)
        return df, response

    except Exception as e:
        return None, f"Error processing PDF: {str(e)}"


# Define example PDFs
pdf_examples = [
    ["catalogue/flexpocket.pdf"],
    ["catalogue/ASICS_Catalog.pdf"],
]

demo = gr.Interface(
    fn=predict_from_pdf,
    inputs=gr.File(label="Upload PDF Catalog"),
    outputs=["json", "text"],
    examples=pdf_examples,
    title="Open Source PDF Catalog Parser",
    description="Efficient PDF catalog processing using fitz and OpenLLM",
    article="Uses PyMuPDF for layout analysis and Llama-CPP for structured extraction"
)

if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=True)