from flask import Flask, request, jsonify import requests import time import json import supabase import logging # Configure logging logging.basicConfig(level=logging.INFO) # Azure Document Intelligence setup AZURE_ENDPOINT = "https://gosignpdf.cognitiveservices.azure.com/" AZURE_KEY = "2nUifMPmbS35qkiFr5OjgzDw7ooE5Piw5892GQgyWZHe0oNRIBJHJQQJ99AKACfhMk5XJ3w3AAALACOGkANC" # Supabase setup SUPABASE_URL = "https://dtzuqtvroalrjhgdcowq.supabase.co/" SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6ImR0enVxdHZyb2FscmpoZ2Rjb3dxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3MjU0NDk3MzIsImV4cCI6MjA0MTAyNTczMn0.WrIvwEOq4CqCb8IkU8G4jiWkf9DM1JxGd2_aTN4vlV4" supabase_client = supabase.create_client(SUPABASE_URL, SUPABASE_KEY) app = Flask(__name__) def log_debug(message, **kwargs): """Log debug messages for tracking.""" print(f"[DEBUG] {message}") if kwargs: for key, value in kwargs.items(): print(f" - {key}: {value}") def download_file_from_supabase(file_path): """Download file from Supabase storage.""" log_debug("Downloading file from Supabase", file_path=file_path) response = supabase_client.storage.from_("files").download(file_path) log_debug("Supabase download response", status_code=response.status_code, text=response.text) if response.status_code != 200: raise Exception(f"Failed to download file from Supabase: {response.text}") return response.content def analyze_pdf_layout(file_content): """Send PDF to Azure and get layout data.""" log_debug("Sending PDF to Azure for analysis") url = f"{AZURE_ENDPOINT}/formrecognizer/documentModels/prebuilt-layout:analyze?api-version=2023-07-31" headers = { "Ocp-Apim-Subscription-Key": AZURE_KEY, "Content-Type": "application/pdf", } response = requests.post(url, headers=headers, data=file_content) log_debug("Azure response", status_code=response.status_code, headers=response.headers) if response.status_code != 202: raise Exception(f"Azure request failed: {response.text}") operation_location = response.headers.get("Operation-Location") log_debug("Azure operation location", operation_location=operation_location) if not operation_location: raise Exception("Operation-Location header not found in response.") while True: result_response = requests.get(operation_location, headers={"Ocp-Apim-Subscription-Key": AZURE_KEY}) result = result_response.json() log_debug("Azure polling result", status=result.get("status")) if result.get("status") == "succeeded": log_debug("Azure analysis succeeded") return result["analyzeResult"] elif result.get("status") == "failed": raise Exception("Analysis failed.") time.sleep(8) @app.route("/analyze", methods=["POST"]) def analyze(): try: # Get file ID from request file_id = request.json.get("file_id") log_debug("Received API request", file_id=file_id) if not file_id: return jsonify({"error": "File ID is required"}), 400 # Fetch file path from Supabase file_data = supabase_client.table("files").select("filePath").eq("id", file_id).single().execute() log_debug("Supabase file data response", status_code=file_data.status_code, data=file_data.data) if file_data.status_code != 200 or not file_data.data: return jsonify({"error": "File not found"}), 404 file_path = file_data.data["filePath"] log_debug("File path retrieved from Supabase", file_path=file_path) # Download the file from Supabase file_content = download_file_from_supabase(file_path) # Analyze the PDF layout with Azure layout_data = analyze_pdf_layout(file_content) log_debug("Layout data retrieved", layout_data=layout_data) # Extract required layout values page_data = layout_data.get("pages", [])[0] # Assuming single-page PDF for simplicity first_word = page_data.get("words", [])[0] last_word = page_data.get("words", [])[-1] page_height = page_data["height"] page_width = page_data["width"] x1 = first_word["polygon"][0] # X1 of first word y4 = last_word["polygon"][-1] # Y4 of last word log_debug("Extracted layout values", page_height=page_height, page_width=page_width, x1=x1, y4=y4) # Update the `files` table in Supabase update_response = supabase_client.table("files").update({ "page_height": page_height, "page_width": page_width, "x1": x1, "y4": y4, }).eq("id", file_id).execute() log_debug("Supabase update response", status_code=update_response.status_code, data=update_response.data) if update_response.status_code != 200: return jsonify({"error": "Failed to update file layout data"}), 500 return jsonify({"message": "Layout data successfully updated"}), 200 except Exception as e: log_debug("Error occurred", error=str(e)) return jsonify({"error": str(e)}), 500 if __name__ == "__main__": app.run(host="0.0.0.0", port=8000)