Spaces:

sharoz
/

Exam_Checker_AI

Running

App Files Files Community

Exam_Checker_AI / transcriber.py

sharoz

innit

c924c3e 4 months ago

raw

history blame contribute delete

2.63 kB

	import os
	import base64
	from openai import OpenAI
	from dotenv import load_dotenv
	from helper import encode_image, pdf_to_images
	# from pdf_processor import pdf_to_images
	from tqdm import tqdm
	import json

	load_dotenv()


	def transcribe_image(image_path, handwritten_flag=True):

	# Getting the Base64 string
	base64_image = encode_image(image_path)
	client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

	if handwritten_flag:
	system_cmd = "You are a professional transcriber, you will be given an input image which has handwritten text and your job is to transcribe it to the best of your ability.\n\nYou are not allowed to correct any mistakes in the imput, the output text should be exactly the same as in the image input. \nJust output raw text."
	else:
	system_cmd = "You are a professional transcriber, you will be given an input image which has text and your job is to transcribe it to the best of your ability.\n\nYou are not allowed to correct any mistakes in the imput, the output text should be exactly the same as in the image input."

	response = client.chat.completions.create(
	model="gpt-4o",
	messages=[
	{
	"role": "system",
	"content": [
	{
	"type": "text",
	"text": system_cmd,
	}
	]
	},
	{
	"role": "user",
	"content": [
	{
	"type": "text",
	# "text": "tables should be in markdown format without any padding, encapsulation or ``` ```.",
	"text": "Transcribe the following image to text in markdown format.",

	},
	{
	"type": "image_url",
	"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
	},
	],
	}
	],
	)

	# print(response.choices[0].message.content)
	return response.choices[0].message.content



	def transcribe_pdf(pdf_path, interim_files_folder_path, save_dict=False, save_dict_path=None):
	image_paths = pdf_to_images(pdf_path, interim_files_folder_path, zoom_x=2.0, zoom_y=2.0)
	transcriptions = {}
	for i, image_path in enumerate(tqdm(image_paths, desc="Transcribing PDF")):
	transcriptions[f"page_{i+1}"] = transcribe_image(image_path)

	if save_dict and save_dict_path:
	with open(save_dict_path, 'w') as f:
	json.dump(transcriptions, f)

	return transcriptions