Spaces:

janmayjay
/

Sonofica

Running

App Files Files Community

Sonofica / utils /json_to_coco.py

janmayjay

Add application file

39a7537 about 1 month ago

raw

history blame

5.34 kB

	import json
	import os

	def json_to_coco(config):
	base_dir = config["root"]
	image_dir = config["root_images"]
	annotations_dir = config["root_annotations"]

	def create_coco(json_dir):
	# COCO base structure
	coco = {
	"images": [],
	"annotations": [],
	"categories": []
	}
	uniq_filename_mapping = {}
	# Category mapping
	category_names = ["body", "face", "frame", "text"]
	category_map = {name: idx + 1 for idx, name in enumerate(category_names)}
	coco["categories"] = [{"id": cid, "name": name, "supercategory": "object"} for name, cid in category_map.items()]
	import copy
	coco_train = copy.deepcopy(coco)
	coco_test = copy.deepcopy(coco)
	coco_val = copy.deepcopy(coco)
	uniq = 0
	for book_json in os.listdir(json_dir):
	book = str(book_json).split(".")[0]
	data = []
	with open(base_dir +"json/"+book+".json", 'r',encoding='utf-8') as file:
	data = json.load(file)

	pages = data["page"]

	train_output = []
	val_output = []
	test_output = []

	import math
	import shutil

	page_count = len(pages) - 2
	div = page_count / 10
	div = int(math.ceil(div))
	# train_count = page_count - (2 * div)
	# val_count = div
	# test_count = div
	# print(train_count,val_count,test_count)
	train_arr = []
	val_arr = []
	test_arr = []
	count = 0
	for i in range(page_count,0,-1):
	if count < div :
	test_arr.append(i)
	elif count < 2*div :
	val_arr.append(i)
	else:
	train_arr.append(i)
	count += 1

	# Annotation ID counter
	ann_id = 1

	# Helper to create COCO bbox annotation
	def create_annotation(ann, category_name, image_id, ann_id,uniq):
	x_min = int(ann["@xmin"])
	y_min = int(ann["@ymin"])
	x_max = int(ann["@xmax"])
	y_max = int(ann["@ymax"])
	width = x_max - x_min
	height = y_max - y_min

	return {
	"id": ann_id,
	"image_id": uniq,
	"category_id": category_map[category_name],
	"bbox": [x_min, y_min, width, height],
	"area": width * height,
	"iscrowd": 0
	}

	def append_data(coco, ann_id,uniq):
	# Add image entry
	coco["images"].append({
	"id": uniq,
	"width": page["@width"],
	"height": page["@height"],
	"file_name": f"{book}_{str(img_id).zfill(3)}.jpg"
	})


	# Loop through each category and add annotations
	for category in category_names:
	for ann in page.get(category, []):
	coco["annotations"].append(
	create_annotation(ann, category, img_id, ann_id,uniq)
	)
	ann_id += 1


	for page in pages:
	uniq += 1
	img_id = int(page["@index"])
	if img_id == 0 or img_id == 1 :
	continue

	curr_img_path = os.path.join(config["manga109_root_dir"],"images",book,f"{str(img_id).zfill(3)}.jpg")
	uniq_filename_mapping[uniq] = book+"_"+(curr_img_path.split("\\")[-1])
	if img_id in train_arr:
	append_data(coco_train, ann_id,uniq)
	train_output.append(coco)
	shutil.copyfile(curr_img_path,os.path.join(image_dir,"train/",book+"_"+(curr_img_path.split("\\")[-1])))
	if img_id in val_arr:
	append_data(coco_val, ann_id,uniq)
	val_output.append(coco)
	shutil.copyfile(curr_img_path,os.path.join(image_dir,"val/",book+"_"+(curr_img_path.split("\\")[-1])))
	if img_id in test_arr:
	append_data(coco_test, ann_id,uniq)
	test_output.append(coco)
	shutil.copyfile(curr_img_path,os.path.join(image_dir,"test/",book+"_"+(curr_img_path.split("\\")[-1])))

	with open(base_dir+"temp_uniq_file_name_mapping.json","w",encoding="utf-8") as f:
	json.dump(uniq_filename_mapping, f, ensure_ascii=False, indent=2)
	with open(annotations_dir+"instances_train.json", "w", encoding="utf-8") as f:
	json.dump(coco_train, f, ensure_ascii=False, indent=2)
	with open(annotations_dir+"instances_val.json", "w", encoding="utf-8") as f:
	json.dump(coco_val, f, ensure_ascii=False, indent=2)
	with open(annotations_dir+"instances_test.json", "w", encoding="utf-8") as f:
	json.dump(coco_test, f, ensure_ascii=False, indent=2)

	create_coco(base_dir+"json/")