import json import os def json_to_coco(config): base_dir = config["root"] image_dir = config["root_images"] annotations_dir = config["root_annotations"] def create_coco(json_dir): # COCO base structure coco = { "images": [], "annotations": [], "categories": [] } uniq_filename_mapping = {} # Category mapping category_names = ["body", "face", "frame", "text"] category_map = {name: idx + 1 for idx, name in enumerate(category_names)} coco["categories"] = [{"id": cid, "name": name, "supercategory": "object"} for name, cid in category_map.items()] import copy coco_train = copy.deepcopy(coco) coco_test = copy.deepcopy(coco) coco_val = copy.deepcopy(coco) uniq = 0 for book_json in os.listdir(json_dir): book = str(book_json).split(".")[0] data = [] with open(base_dir +"json/"+book+".json", 'r',encoding='utf-8') as file: data = json.load(file) pages = data["page"] train_output = [] val_output = [] test_output = [] import math import shutil page_count = len(pages) - 2 div = page_count / 10 div = int(math.ceil(div)) # train_count = page_count - (2 * div) # val_count = div # test_count = div # print(train_count,val_count,test_count) train_arr = [] val_arr = [] test_arr = [] count = 0 for i in range(page_count,0,-1): if count < div : test_arr.append(i) elif count < 2*div : val_arr.append(i) else: train_arr.append(i) count += 1 # Annotation ID counter ann_id = 1 # Helper to create COCO bbox annotation def create_annotation(ann, category_name, image_id, ann_id,uniq): x_min = int(ann["@xmin"]) y_min = int(ann["@ymin"]) x_max = int(ann["@xmax"]) y_max = int(ann["@ymax"]) width = x_max - x_min height = y_max - y_min return { "id": ann_id, "image_id": uniq, "category_id": category_map[category_name], "bbox": [x_min, y_min, width, height], "area": width * height, "iscrowd": 0 } def append_data(coco, ann_id,uniq): # Add image entry coco["images"].append({ "id": uniq, "width": page["@width"], "height": page["@height"], "file_name": f"{book}_{str(img_id).zfill(3)}.jpg" }) # Loop through each category and add annotations for category in category_names: for ann in page.get(category, []): coco["annotations"].append( create_annotation(ann, category, img_id, ann_id,uniq) ) ann_id += 1 for page in pages: uniq += 1 img_id = int(page["@index"]) if img_id == 0 or img_id == 1 : continue curr_img_path = os.path.join(config["manga109_root_dir"],"images",book,f"{str(img_id).zfill(3)}.jpg") uniq_filename_mapping[uniq] = book+"_"+(curr_img_path.split("\\")[-1]) if img_id in train_arr: append_data(coco_train, ann_id,uniq) train_output.append(coco) shutil.copyfile(curr_img_path,os.path.join(image_dir,"train/",book+"_"+(curr_img_path.split("\\")[-1]))) if img_id in val_arr: append_data(coco_val, ann_id,uniq) val_output.append(coco) shutil.copyfile(curr_img_path,os.path.join(image_dir,"val/",book+"_"+(curr_img_path.split("\\")[-1]))) if img_id in test_arr: append_data(coco_test, ann_id,uniq) test_output.append(coco) shutil.copyfile(curr_img_path,os.path.join(image_dir,"test/",book+"_"+(curr_img_path.split("\\")[-1]))) with open(base_dir+"temp_uniq_file_name_mapping.json","w",encoding="utf-8") as f: json.dump(uniq_filename_mapping, f, ensure_ascii=False, indent=2) with open(annotations_dir+"instances_train.json", "w", encoding="utf-8") as f: json.dump(coco_train, f, ensure_ascii=False, indent=2) with open(annotations_dir+"instances_val.json", "w", encoding="utf-8") as f: json.dump(coco_val, f, ensure_ascii=False, indent=2) with open(annotations_dir+"instances_test.json", "w", encoding="utf-8") as f: json.dump(coco_test, f, ensure_ascii=False, indent=2) create_coco(base_dir+"json/")