import os import base64 from openai import OpenAI from dotenv import load_dotenv from helper import encode_image, pdf_to_images # from pdf_processor import pdf_to_images from tqdm import tqdm import json load_dotenv() def transcribe_image(image_path, handwritten_flag=True): # Getting the Base64 string base64_image = encode_image(image_path) client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) if handwritten_flag: system_cmd = "You are a professional transcriber, you will be given an input image which has handwritten text and your job is to transcribe it to the best of your ability.\n\nYou are not allowed to correct any mistakes in the imput, the output text should be exactly the same as in the image input. \nJust output raw text." else: system_cmd = "You are a professional transcriber, you will be given an input image which has text and your job is to transcribe it to the best of your ability.\n\nYou are not allowed to correct any mistakes in the imput, the output text should be exactly the same as in the image input." response = client.chat.completions.create( model="gpt-4o", messages=[ { "role": "system", "content": [ { "type": "text", "text": system_cmd, } ] }, { "role": "user", "content": [ { "type": "text", # "text": "tables should be in markdown format without any padding, encapsulation or ``` ```.", "text": "Transcribe the following image to text in markdown format.", }, { "type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}, }, ], } ], ) # print(response.choices[0].message.content) return response.choices[0].message.content def transcribe_pdf(pdf_path, interim_files_folder_path, save_dict=False, save_dict_path=None): image_paths = pdf_to_images(pdf_path, interim_files_folder_path, zoom_x=2.0, zoom_y=2.0) transcriptions = {} for i, image_path in enumerate(tqdm(image_paths, desc="Transcribing PDF")): transcriptions[f"page_{i+1}"] = transcribe_image(image_path) if save_dict and save_dict_path: with open(save_dict_path, 'w') as f: json.dump(transcriptions, f) return transcriptions