File size: 5,248 Bytes
f89d964
 
 
 
 
24b0376
 
 
 
f89d964
 
 
24b0376
f89d964
 
24b0376
 
f89d964
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from flask import Flask, request, jsonify
import requests
import time
import json
import supabase
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)

# Azure Document Intelligence setup
AZURE_ENDPOINT = "https://gosignpdf.cognitiveservices.azure.com/"
AZURE_KEY = "2nUifMPmbS35qkiFr5OjgzDw7ooE5Piw5892GQgyWZHe0oNRIBJHJQQJ99AKACfhMk5XJ3w3AAALACOGkANC"

# Supabase setup
SUPABASE_URL = "https://dtzuqtvroalrjhgdcowq.supabase.co/"
SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImR0enVxdHZyb2FscmpoZ2Rjb3dxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MjU0NDk3MzIsImV4cCI6MjA0MTAyNTczMn0.WrIvwEOq4CqCb8IkU8G4jiWkf9DM1JxGd2_aTN4vlV4"
supabase_client = supabase.create_client(SUPABASE_URL, SUPABASE_KEY)

app = Flask(__name__)

def log_debug(message, **kwargs):
    """Log debug messages for tracking."""
    print(f"[DEBUG] {message}")
    if kwargs:
        for key, value in kwargs.items():
            print(f"  - {key}: {value}")

def download_file_from_supabase(file_path):
    """Download file from Supabase storage."""
    log_debug("Downloading file from Supabase", file_path=file_path)
    response = supabase_client.storage.from_("your_bucket_name").download(file_path)
    log_debug("Supabase download response", status_code=response.status_code, text=response.text)
    if response.status_code != 200:
        raise Exception(f"Failed to download file from Supabase: {response.text}")
    return response.content

def analyze_pdf_layout(file_content):
    """Send PDF to Azure and get layout data."""
    log_debug("Sending PDF to Azure for analysis")
    url = f"{AZURE_ENDPOINT}/formrecognizer/documentModels/prebuilt-layout:analyze?api-version=2023-07-31"
    headers = {
        "Ocp-Apim-Subscription-Key": AZURE_KEY,
        "Content-Type": "application/pdf",
    }

    response = requests.post(url, headers=headers, data=file_content)
    log_debug("Azure response", status_code=response.status_code, headers=response.headers)
    if response.status_code != 202:
        raise Exception(f"Azure request failed: {response.text}")

    operation_location = response.headers.get("Operation-Location")
    log_debug("Azure operation location", operation_location=operation_location)
    if not operation_location:
        raise Exception("Operation-Location header not found in response.")

    while True:
        result_response = requests.get(operation_location, headers={"Ocp-Apim-Subscription-Key": AZURE_KEY})
        result = result_response.json()
        log_debug("Azure polling result", status=result.get("status"))

        if result.get("status") == "succeeded":
            log_debug("Azure analysis succeeded")
            return result["analyzeResult"]
        elif result.get("status") == "failed":
            raise Exception("Analysis failed.")
        time.sleep(8)

@app.route("/analyze", methods=["POST"])
def analyze():
    try:
        # Get file ID from request
        file_id = request.json.get("file_id")
        log_debug("Received API request", file_id=file_id)
        if not file_id:
            return jsonify({"error": "File ID is required"}), 400

        # Fetch file path from Supabase
        file_data = supabase_client.table("files").select("filePath").eq("id", file_id).single().execute()
        log_debug("Supabase file data response", status_code=file_data.status_code, data=file_data.data)
        if file_data.status_code != 200 or not file_data.data:
            return jsonify({"error": "File not found"}), 404

        file_path = file_data.data["filePath"]
        log_debug("File path retrieved from Supabase", file_path=file_path)

        # Download the file from Supabase
        file_content = download_file_from_supabase(file_path)

        # Analyze the PDF layout with Azure
        layout_data = analyze_pdf_layout(file_content)
        log_debug("Layout data retrieved", layout_data=layout_data)

        # Extract required layout values
        page_data = layout_data.get("pages", [])[0]  # Assuming single-page PDF for simplicity
        first_word = page_data.get("words", [])[0]
        last_word = page_data.get("words", [])[-1]

        page_height = page_data["height"]
        page_width = page_data["width"]
        x1 = first_word["polygon"][0]  # X1 of first word
        y4 = last_word["polygon"][-1]  # Y4 of last word
        log_debug("Extracted layout values", page_height=page_height, page_width=page_width, x1=x1, y4=y4)

        # Update the `files` table in Supabase
        update_response = supabase_client.table("files").update({
            "page_height": page_height,
            "page_width": page_width,
            "x1": x1,
            "y4": y4,
        }).eq("id", file_id).execute()
        log_debug("Supabase update response", status_code=update_response.status_code, data=update_response.data)

        if update_response.status_code != 200:
            return jsonify({"error": "Failed to update file layout data"}), 500

        return jsonify({"message": "Layout data successfully updated"}), 200

    except Exception as e:
        log_debug("Error occurred", error=str(e))
        return jsonify({"error": str(e)}), 500

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=8000)