File size: 1,464 Bytes
43355d2
 
f8daace
17345fb
43355d2
4bc3210
f8daace
 
 
 
4bc3210
f8daace
 
17345fb
d27c873
f8daace
 
17345fb
f8daace
 
 
d27c873
f8daace
 
 
d27c873
f8daace
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import os
import gradio as gr
import main
import shutil


def predict_from_pdf(pdf_file):
    # Create a temporary directory for file uploads
    upload_dir = "./catalogue/"
    os.makedirs(upload_dir, exist_ok=True)

    # Use the provided file path from Gradio's file object
    dest_file_path = os.path.join(upload_dir, os.path.basename(pdf_file.name))

    try:
        # Save the uploaded file using shutil.copy
        shutil.copy(pdf_file, dest_file_path)

        # Check if the file was saved successfully
        if not os.path.exists(dest_file_path):
            return None, f"Error: The file {dest_file_path} could not be found or opened."

        # Process the PDF and retrieve the product info
        df, response = main.process_pdf_catalog(dest_file_path)
        return df, response

    except Exception as e:
        return None, f"Error processing PDF: {str(e)}"


# Define example PDFs
pdf_examples = [
    ["catalogue/flexpocket.pdf"],
    ["catalogue/ASICS_Catalog.pdf"],
]

demo = gr.Interface(
    fn=predict_from_pdf,
    inputs=gr.File(label="Upload PDF Catalog"),
    outputs=["json", "text"],
    examples=pdf_examples,
    title="Open Source PDF Catalog Parser",
    description="Efficient PDF catalog processing using fitz and OpenLLM",
    article="Uses PyMuPDF for layout analysis and Llama-CPP for structured extraction"
)

if __name__ == "__main__":
    demo.queue().launch(server_name="0.0.0.0", server_port=7860, share=True)