Spaces:
Running
Running
import base64 | |
import requests | |
import os | |
import shutil | |
from discord_webhook import DiscordWebhook | |
from dotenv import load_dotenv | |
load_dotenv() # take environment variables from .env. | |
def empty_directory(directory): | |
# Check if the directory exists | |
if not os.path.exists(directory): | |
return | |
# Iterate over all files and directories within the specified directory | |
for item in os.listdir(directory): | |
item_path = os.path.join(directory, item) # Get the full path of the item | |
# Check if the item is a file or directory and delete accordingly | |
if os.path.isfile(item_path): | |
os.remove(item_path) # Remove the file | |
elif os.path.isdir(item_path): | |
shutil.rmtree(item_path) # Remove the directory and all its contents | |
def save_uploaded_file(directory, file): | |
if not os.path.exists(directory): | |
os.makedirs(directory) | |
file_path = os.path.join(directory, file.name) | |
with open(file_path, "wb") as f: | |
f.write(file.getbuffer()) | |
return file_path | |
def make_discord_trace_multimodal(image_path, text_message): | |
webhook = DiscordWebhook(url=os.environ["DISCORD_HOOK"], | |
username="invoice parsing using GPT_Vision", | |
content=text_message) | |
# send two images | |
with open(image_path, "rb") as f: | |
webhook.add_file(file=f.read(), filename="input.jpg") | |
webhook.execute() | |
def pass_to_openai_vision_api(image): | |
# OpenAI API Key | |
api_key = os.environ["OPENAI_API_KEY"] | |
# Getting the base64 string | |
base64_image = base64.b64encode(image.read()).decode('utf-8') | |
headers = { | |
"Content-Type": "application/json", | |
"Authorization": f"Bearer {api_key}" | |
} | |
gpt_prompt='''Above is the text extracted from an invoice. | |
You are an assistant tasked with extracting information from the invoice. Do this step by step. | |
1. First extract the date and due date. | |
2. Then assign it a category (e.g Food). | |
3. Extract the invoice number and vendor account number. | |
4. Extract the total amount. | |
5. Extract the items along with their name, quantity and individual price. | |
Output should only contain a dictionary in the following format | |
{ | |
"Date": None, | |
"Due Date": None, | |
"Category": None, | |
"Invoice Number": None, | |
"Vendor Account Number": None, | |
"Total Amount": None, | |
"Items": [ | |
{ | |
"Item": None, | |
"Quantity": None, | |
"Individual Price": None | |
} | |
] | |
} | |
If a key is not mentioned in invoice or you dont understand, then make its value None | |
''' | |
payload = { | |
"model": "gpt-4-vision-preview", | |
"messages": [ | |
{ | |
"role": "user", | |
"content": [ | |
{ | |
"type": "text", | |
"text": gpt_prompt | |
}, | |
{ | |
"type": "image_url", | |
"image_url": { | |
"url": f"data:image/jpeg;base64,{base64_image}" | |
} | |
} | |
] | |
} | |
], | |
"max_tokens": 300 # To be investigated | |
} | |
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload) | |
return response.json()['choices'][0]['message']['content'] |