letterhead / app.py
gosign's picture
Update app.py
1c01735 verified
raw
history blame
5.24 kB
from flask import Flask, request, jsonify
import requests
import time
import json
import supabase
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
# Azure Document Intelligence setup
AZURE_ENDPOINT = "https://gosignpdf.cognitiveservices.azure.com/"
AZURE_KEY = "2nUifMPmbS35qkiFr5OjgzDw7ooE5Piw5892GQgyWZHe0oNRIBJHJQQJ99AKACfhMk5XJ3w3AAALACOGkANC"
# Supabase setup
SUPABASE_URL = "https://dtzuqtvroalrjhgdcowq.supabase.co/"
SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImR0enVxdHZyb2FscmpoZ2Rjb3dxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MjU0NDk3MzIsImV4cCI6MjA0MTAyNTczMn0.WrIvwEOq4CqCb8IkU8G4jiWkf9DM1JxGd2_aTN4vlV4"
supabase_client = supabase.create_client(SUPABASE_URL, SUPABASE_KEY)
app = Flask(__name__)
def log_debug(message, **kwargs):
"""Log debug messages for tracking."""
print(f"[DEBUG] {message}")
if kwargs:
for key, value in kwargs.items():
print(f" - {key}: {value}")
def download_file_from_supabase(file_path):
"""Download file from Supabase storage."""
log_debug("Downloading file from Supabase", file_path=file_path)
response = supabase_client.storage.from_("files").download(file_path)
log_debug("Supabase download response", status_code=response.status_code, text=response.text)
if response.status_code != 200:
raise Exception(f"Failed to download file from Supabase: {response.text}")
return response.content
def analyze_pdf_layout(file_content):
"""Send PDF to Azure and get layout data."""
log_debug("Sending PDF to Azure for analysis")
url = f"{AZURE_ENDPOINT}/formrecognizer/documentModels/prebuilt-layout:analyze?api-version=2023-07-31"
headers = {
"Ocp-Apim-Subscription-Key": AZURE_KEY,
"Content-Type": "application/pdf",
}
response = requests.post(url, headers=headers, data=file_content)
log_debug("Azure response", status_code=response.status_code, headers=response.headers)
if response.status_code != 202:
raise Exception(f"Azure request failed: {response.text}")
operation_location = response.headers.get("Operation-Location")
log_debug("Azure operation location", operation_location=operation_location)
if not operation_location:
raise Exception("Operation-Location header not found in response.")
while True:
result_response = requests.get(operation_location, headers={"Ocp-Apim-Subscription-Key": AZURE_KEY})
result = result_response.json()
log_debug("Azure polling result", status=result.get("status"))
if result.get("status") == "succeeded":
log_debug("Azure analysis succeeded")
return result["analyzeResult"]
elif result.get("status") == "failed":
raise Exception("Analysis failed.")
time.sleep(8)
@app.route("/analyze", methods=["POST"])
def analyze():
try:
# Get file ID from request
file_id = request.json.get("file_id")
log_debug("Received API request", file_id=file_id)
if not file_id:
return jsonify({"error": "File ID is required"}), 400
# Fetch file path from Supabase
file_data = supabase_client.table("files").select("filePath").eq("id", file_id).single().execute()
log_debug("Supabase file data response", status_code=file_data.status_code, data=file_data.data)
if file_data.status_code != 200 or not file_data.data:
return jsonify({"error": "File not found"}), 404
file_path = file_data.data["filePath"]
log_debug("File path retrieved from Supabase", file_path=file_path)
# Download the file from Supabase
file_content = download_file_from_supabase(file_path)
# Analyze the PDF layout with Azure
layout_data = analyze_pdf_layout(file_content)
log_debug("Layout data retrieved", layout_data=layout_data)
# Extract required layout values
page_data = layout_data.get("pages", [])[0] # Assuming single-page PDF for simplicity
first_word = page_data.get("words", [])[0]
last_word = page_data.get("words", [])[-1]
page_height = page_data["height"]
page_width = page_data["width"]
x1 = first_word["polygon"][0] # X1 of first word
y4 = last_word["polygon"][-1] # Y4 of last word
log_debug("Extracted layout values", page_height=page_height, page_width=page_width, x1=x1, y4=y4)
# Update the `files` table in Supabase
update_response = supabase_client.table("files").update({
"page_height": page_height,
"page_width": page_width,
"x1": x1,
"y4": y4,
}).eq("id", file_id).execute()
log_debug("Supabase update response", status_code=update_response.status_code, data=update_response.data)
if update_response.status_code != 200:
return jsonify({"error": "Failed to update file layout data"}), 500
return jsonify({"message": "Layout data successfully updated"}), 200
except Exception as e:
log_debug("Error occurred", error=str(e))
return jsonify({"error": str(e)}), 500
if __name__ == "__main__":
app.run(host="0.0.0.0", port=8000)