|
""" |
|
Function to convert an image of chat to structured text using OpenAI's GPT-4o model. |
|
|
|
This module provides a function to process chat screenshots and convert them |
|
into structured text format using OpenAI's vision capabilities. |
|
""" |
|
import os |
|
import base64 |
|
from dotenv import load_dotenv |
|
from openai import OpenAI |
|
|
|
|
|
load_dotenv() |
|
|
|
def encode_image(image_path): |
|
""" |
|
Encode an image file to base64. |
|
|
|
Args: |
|
image_path (str): Path to the image file |
|
|
|
Returns: |
|
str: Base64 encoded image string |
|
""" |
|
with open(image_path, "rb") as image_file: |
|
return base64.b64encode(image_file.read()).decode("utf-8") |
|
|
|
def img2chat(image_input): |
|
""" |
|
Convert a chat screenshot to structured text using OpenAI's GPT-4o model. |
|
|
|
This function takes either an image path or a base64 encoded image string |
|
and uses OpenAI's GPT-4o model to extract the conversation in a structured format. |
|
|
|
Args: |
|
image_input (str): Either a file path to an image, a URL, or a base64 encoded image |
|
|
|
Returns: |
|
str: The structured chat text extracted from the image |
|
""" |
|
|
|
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) |
|
|
|
|
|
if os.path.isfile(image_input): |
|
|
|
base64_image = encode_image(image_input) |
|
image_url = f"data:image/jpeg;base64,{base64_image}" |
|
elif image_input.startswith("data:image"): |
|
|
|
image_url = image_input |
|
elif image_input.startswith("http"): |
|
|
|
image_url = image_input |
|
else: |
|
|
|
image_url = f"data:image/jpeg;base64,{image_input}" |
|
|
|
response = client.chat.completions.create( |
|
model="gpt-4o", |
|
messages=[ |
|
{ |
|
"role": "system", |
|
"content": "你是一位善於將對話的聊天紀錄的截圖,還原成文本結構的聊天資訊的專家。" |
|
"對於收到的每一張截圖,請仔細閱讀他們的聊天紀錄,左側的訊息表示其他人傳的訊息、" |
|
"右邊則是用戶傳的訊息,將聊天紀錄的截圖轉換成文本格式的聊天。" |
|
"請依照訊息的時間順序,由舊到新的順序,使用XML的格式來輸出聊天紀錄," |
|
"<usr>表示使用者的聊天對象的訊息、<self>表示使用者傳的訊息," |
|
"最多只能有chat、usr這種二級結構," |
|
"例如<chat><usr>你好你好</usr><usr>哈囉哈囉</usr></chat>\n\n" |
|
"務必要確保忠時的還原使用者的對話紀錄,並且忽略聊天室中像是圖片、音訊等非文字的資訊。" |
|
"並使用<chat></chat>為標記,包裹整段的聊天資訊。如果你解析不出任何訊息,輸出<chat></chat>就好。" |
|
}, |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{"type": "text", "text": "請分析這張聊天截圖並轉換成文本格式:"}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": image_url, |
|
} |
|
} |
|
] |
|
} |
|
], |
|
max_tokens=913, |
|
) |
|
|
|
return response.choices[0].message.content |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
print("\nExample with file path:") |
|
print(img2chat("tst_img/tst1.png")) |