Sonofica / utils /json_to_coco.py
janmayjay's picture
Add application file
39a7537
raw
history blame
5.34 kB
import json
import os
def json_to_coco(config):
base_dir = config["root"]
image_dir = config["root_images"]
annotations_dir = config["root_annotations"]
def create_coco(json_dir):
# COCO base structure
coco = {
"images": [],
"annotations": [],
"categories": []
}
uniq_filename_mapping = {}
# Category mapping
category_names = ["body", "face", "frame", "text"]
category_map = {name: idx + 1 for idx, name in enumerate(category_names)}
coco["categories"] = [{"id": cid, "name": name, "supercategory": "object"} for name, cid in category_map.items()]
import copy
coco_train = copy.deepcopy(coco)
coco_test = copy.deepcopy(coco)
coco_val = copy.deepcopy(coco)
uniq = 0
for book_json in os.listdir(json_dir):
book = str(book_json).split(".")[0]
data = []
with open(base_dir +"json/"+book+".json", 'r',encoding='utf-8') as file:
data = json.load(file)
pages = data["page"]
train_output = []
val_output = []
test_output = []
import math
import shutil
page_count = len(pages) - 2
div = page_count / 10
div = int(math.ceil(div))
# train_count = page_count - (2 * div)
# val_count = div
# test_count = div
# print(train_count,val_count,test_count)
train_arr = []
val_arr = []
test_arr = []
count = 0
for i in range(page_count,0,-1):
if count < div :
test_arr.append(i)
elif count < 2*div :
val_arr.append(i)
else:
train_arr.append(i)
count += 1
# Annotation ID counter
ann_id = 1
# Helper to create COCO bbox annotation
def create_annotation(ann, category_name, image_id, ann_id,uniq):
x_min = int(ann["@xmin"])
y_min = int(ann["@ymin"])
x_max = int(ann["@xmax"])
y_max = int(ann["@ymax"])
width = x_max - x_min
height = y_max - y_min
return {
"id": ann_id,
"image_id": uniq,
"category_id": category_map[category_name],
"bbox": [x_min, y_min, width, height],
"area": width * height,
"iscrowd": 0
}
def append_data(coco, ann_id,uniq):
# Add image entry
coco["images"].append({
"id": uniq,
"width": page["@width"],
"height": page["@height"],
"file_name": f"{book}_{str(img_id).zfill(3)}.jpg"
})
# Loop through each category and add annotations
for category in category_names:
for ann in page.get(category, []):
coco["annotations"].append(
create_annotation(ann, category, img_id, ann_id,uniq)
)
ann_id += 1
for page in pages:
uniq += 1
img_id = int(page["@index"])
if img_id == 0 or img_id == 1 :
continue
curr_img_path = os.path.join(config["manga109_root_dir"],"images",book,f"{str(img_id).zfill(3)}.jpg")
uniq_filename_mapping[uniq] = book+"_"+(curr_img_path.split("\\")[-1])
if img_id in train_arr:
append_data(coco_train, ann_id,uniq)
train_output.append(coco)
shutil.copyfile(curr_img_path,os.path.join(image_dir,"train/",book+"_"+(curr_img_path.split("\\")[-1])))
if img_id in val_arr:
append_data(coco_val, ann_id,uniq)
val_output.append(coco)
shutil.copyfile(curr_img_path,os.path.join(image_dir,"val/",book+"_"+(curr_img_path.split("\\")[-1])))
if img_id in test_arr:
append_data(coco_test, ann_id,uniq)
test_output.append(coco)
shutil.copyfile(curr_img_path,os.path.join(image_dir,"test/",book+"_"+(curr_img_path.split("\\")[-1])))
with open(base_dir+"temp_uniq_file_name_mapping.json","w",encoding="utf-8") as f:
json.dump(uniq_filename_mapping, f, ensure_ascii=False, indent=2)
with open(annotations_dir+"instances_train.json", "w", encoding="utf-8") as f:
json.dump(coco_train, f, ensure_ascii=False, indent=2)
with open(annotations_dir+"instances_val.json", "w", encoding="utf-8") as f:
json.dump(coco_val, f, ensure_ascii=False, indent=2)
with open(annotations_dir+"instances_test.json", "w", encoding="utf-8") as f:
json.dump(coco_test, f, ensure_ascii=False, indent=2)
create_coco(base_dir+"json/")