File size: 4,914 Bytes
e9d377a
1c01735
 
 
 
2ddf5db
 
 
 
e9d377a
1c01735
 
 
e9d377a
1c01735
 
5497d2c
1c01735
e9d377a
1c01735
e9d377a
1c01735
 
 
 
 
 
 
 
e76988b
1c01735
17b5964
 
 
 
1c01735
17b5964
1c01735
 
e76988b
1c01735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e9d377a
1c01735
 
35a83ee
 
1c01735
 
28ec659
9342c0c
 
 
35a83ee
e4d16b6
38a49e3
1c01735
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38a49e3
fab23fe
 
 
 
 
 
c5d1b61
 
 
 
 
ec09a0a
 
 
c5d1b61
 
 
 
 
 
 
 
e9d377a
ec09a0a
9daafcc
acb2de2
9daafcc
8d04284
38a49e3
35a83ee
9342c0c
1c01735
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
from flask import Flask, request, jsonify
import requests
import time
import json
import supabase
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)

# Azure Document Intelligence setup
AZURE_ENDPOINT = "https://gosignpdf.cognitiveservices.azure.com/"
AZURE_KEY = "2nUifMPmbS35qkiFr5OjgzDw7ooE5Piw5892GQgyWZHe0oNRIBJHJQQJ99AKACfhMk5XJ3w3AAALACOGkANC"

# Supabase setup
SUPABASE_URL = "https://dtzuqtvroalrjhgdcowq.supabase.co/"
SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImR0enVxdHZyb2FscmpoZ2Rjb3dxIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTcyNTQ0OTczMiwiZXhwIjoyMDQxMDI1NzMyfQ.bMjGnnuYNlSEyaSWLNf_aOOebvDhFirPDr6zXjMHs64"
supabase_client = supabase.create_client(SUPABASE_URL, SUPABASE_KEY)

app = Flask(__name__)

def log_debug(message, **kwargs):
    """Log debug messages for tracking."""
    print(f"[DEBUG] {message}")
    if kwargs:
        for key, value in kwargs.items():
            print(f"  - {key}: {value}")

def download_file_from_supabase(file_path):
    # """Download file from Supabase storage."""
    response = supabase_client.storage.from_("files").download(file_path)
    # No need to check status_code here, as response.content is the file content (bytes).
    if isinstance(response, bytes):  # Direct check if response is file content.
        return response
    else:
        raise Exception(f"Failed to download file from Supabase: {response.text}")


def analyze_pdf_layout(file_content):
    # """Send PDF to Azure and get layout data."""
    url = f"{AZURE_ENDPOINT}/formrecognizer/documentModels/prebuilt-layout:analyze?api-version=2023-07-31"
    headers = {
        "Ocp-Apim-Subscription-Key": AZURE_KEY,
        "Content-Type": "application/pdf",
    }

    response = requests.post(url, headers=headers, data=file_content)
    if response.status_code != 202:
        raise Exception(f"Azure request failed: {response.text}")

    operation_location = response.headers.get("Operation-Location")
    if not operation_location:
        raise Exception("Operation-Location header not found in response.")

    while True:
        result_response = requests.get(operation_location, headers={"Ocp-Apim-Subscription-Key": AZURE_KEY})
        result = result_response.json()

        if result.get("status") == "succeeded":
            return result["analyzeResult"]
        elif result.get("status") == "failed":
            raise Exception("Analysis failed.")
        time.sleep(8)

@app.route("/analyze", methods=["POST"])
def analyze():
    try:
        # Get file ID from request
        file_id = request.json.get("file_id")
        if not file_id:
            return jsonify({"error": "File ID is required"}), 400

        # Fetch file path from Supabase
        file_data = supabase_client.table("files").select("file_path").eq("id", file_id).single().execute()

        # Check if file_data is not None and contains valid data
        if not file_data or not file_data.data:
            return jsonify({"error": "File not found or Supabase query failed"}), 404

        file_path = file_data.data["file_path"]
        # Download the file from Supabase
        file_content = download_file_from_supabase(file_path)

        # Analyze the PDF layout with Azure
        layout_data = analyze_pdf_layout(file_content)

        # Extract required layout values
        page_data = layout_data.get("pages", [])[0]  # Assuming single-page PDF for simplicity
        first_word = page_data.get("words", [])[0]
        last_word = page_data.get("words", [])[-1]

        page_height = page_data["height"]
        page_width = page_data["width"]
        x1 = first_word["polygon"][0]  # X1 of first word
        y4 = last_word["polygon"][-1]  # Y4 of last word

        # Commenting out the Supabase file update logic
        update_response = supabase_client.table("files").update({
            "page_height": page_height,
            "page_width": page_width,
            "x1": x1,
            "y4": y4,
        }).eq("id", file_id).execute()
        
        # Check if update was successful by checking if data is present and valid
        if not update_response.data:
            return jsonify({"error": "Failed to update file layout data"}), 500
        
        # Check if there is any error message in the response
        if hasattr(update_response, 'error') and update_response.error:
            return jsonify({"error": "Failed to update file layout data", "details": update_response.error}), 500
        
        return jsonify({
            "message": "Layout data analyzed successfully",
            "page_height": page_height,
            "page_width": page_width,
            "x1": x1,
            "y4": y4
        }), 200


    except Exception as e:
        print("Error": str(e))
        return jsonify({"error": str(e)}), 500




if __name__ == "__main__":
    app.run(host="0.0.0.0", port=8000)