Spaces:
Running
Running
File size: 4,914 Bytes
e9d377a 1c01735 2ddf5db e9d377a 1c01735 e9d377a 1c01735 5497d2c 1c01735 e9d377a 1c01735 e9d377a 1c01735 e76988b 1c01735 17b5964 1c01735 17b5964 1c01735 e76988b 1c01735 e9d377a 1c01735 35a83ee 1c01735 28ec659 9342c0c 35a83ee e4d16b6 38a49e3 1c01735 38a49e3 fab23fe c5d1b61 ec09a0a c5d1b61 e9d377a ec09a0a 9daafcc acb2de2 9daafcc 8d04284 38a49e3 35a83ee 9342c0c 1c01735 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
from flask import Flask, request, jsonify
import requests
import time
import json
import supabase
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
# Azure Document Intelligence setup
AZURE_ENDPOINT = "https://gosignpdf.cognitiveservices.azure.com/"
AZURE_KEY = "2nUifMPmbS35qkiFr5OjgzDw7ooE5Piw5892GQgyWZHe0oNRIBJHJQQJ99AKACfhMk5XJ3w3AAALACOGkANC"
# Supabase setup
SUPABASE_URL = "https://dtzuqtvroalrjhgdcowq.supabase.co/"
SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImR0enVxdHZyb2FscmpoZ2Rjb3dxIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImlhdCI6MTcyNTQ0OTczMiwiZXhwIjoyMDQxMDI1NzMyfQ.bMjGnnuYNlSEyaSWLNf_aOOebvDhFirPDr6zXjMHs64"
supabase_client = supabase.create_client(SUPABASE_URL, SUPABASE_KEY)
app = Flask(__name__)
def log_debug(message, **kwargs):
"""Log debug messages for tracking."""
print(f"[DEBUG] {message}")
if kwargs:
for key, value in kwargs.items():
print(f" - {key}: {value}")
def download_file_from_supabase(file_path):
# """Download file from Supabase storage."""
response = supabase_client.storage.from_("files").download(file_path)
# No need to check status_code here, as response.content is the file content (bytes).
if isinstance(response, bytes): # Direct check if response is file content.
return response
else:
raise Exception(f"Failed to download file from Supabase: {response.text}")
def analyze_pdf_layout(file_content):
# """Send PDF to Azure and get layout data."""
url = f"{AZURE_ENDPOINT}/formrecognizer/documentModels/prebuilt-layout:analyze?api-version=2023-07-31"
headers = {
"Ocp-Apim-Subscription-Key": AZURE_KEY,
"Content-Type": "application/pdf",
}
response = requests.post(url, headers=headers, data=file_content)
if response.status_code != 202:
raise Exception(f"Azure request failed: {response.text}")
operation_location = response.headers.get("Operation-Location")
if not operation_location:
raise Exception("Operation-Location header not found in response.")
while True:
result_response = requests.get(operation_location, headers={"Ocp-Apim-Subscription-Key": AZURE_KEY})
result = result_response.json()
if result.get("status") == "succeeded":
return result["analyzeResult"]
elif result.get("status") == "failed":
raise Exception("Analysis failed.")
time.sleep(8)
@app.route("/analyze", methods=["POST"])
def analyze():
try:
# Get file ID from request
file_id = request.json.get("file_id")
if not file_id:
return jsonify({"error": "File ID is required"}), 400
# Fetch file path from Supabase
file_data = supabase_client.table("files").select("file_path").eq("id", file_id).single().execute()
# Check if file_data is not None and contains valid data
if not file_data or not file_data.data:
return jsonify({"error": "File not found or Supabase query failed"}), 404
file_path = file_data.data["file_path"]
# Download the file from Supabase
file_content = download_file_from_supabase(file_path)
# Analyze the PDF layout with Azure
layout_data = analyze_pdf_layout(file_content)
# Extract required layout values
page_data = layout_data.get("pages", [])[0] # Assuming single-page PDF for simplicity
first_word = page_data.get("words", [])[0]
last_word = page_data.get("words", [])[-1]
page_height = page_data["height"]
page_width = page_data["width"]
x1 = first_word["polygon"][0] # X1 of first word
y4 = last_word["polygon"][-1] # Y4 of last word
# Commenting out the Supabase file update logic
update_response = supabase_client.table("files").update({
"page_height": page_height,
"page_width": page_width,
"x1": x1,
"y4": y4,
}).eq("id", file_id).execute()
# Check if update was successful by checking if data is present and valid
if not update_response.data:
return jsonify({"error": "Failed to update file layout data"}), 500
# Check if there is any error message in the response
if hasattr(update_response, 'error') and update_response.error:
return jsonify({"error": "Failed to update file layout data", "details": update_response.error}), 500
return jsonify({
"message": "Layout data analyzed successfully",
"page_height": page_height,
"page_width": page_width,
"x1": x1,
"y4": y4
}), 200
except Exception as e:
print("Error": str(e))
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
app.run(host="0.0.0.0", port=8000)
|