|
import json
|
|
import os
|
|
|
|
def json_to_coco(config):
|
|
base_dir = config["root"]
|
|
image_dir = config["root_images"]
|
|
annotations_dir = config["root_annotations"]
|
|
|
|
def create_coco(json_dir):
|
|
|
|
coco = {
|
|
"images": [],
|
|
"annotations": [],
|
|
"categories": []
|
|
}
|
|
uniq_filename_mapping = {}
|
|
|
|
category_names = ["body", "face", "frame", "text"]
|
|
category_map = {name: idx + 1 for idx, name in enumerate(category_names)}
|
|
coco["categories"] = [{"id": cid, "name": name, "supercategory": "object"} for name, cid in category_map.items()]
|
|
import copy
|
|
coco_train = copy.deepcopy(coco)
|
|
coco_test = copy.deepcopy(coco)
|
|
coco_val = copy.deepcopy(coco)
|
|
uniq = 0
|
|
for book_json in os.listdir(json_dir):
|
|
book = str(book_json).split(".")[0]
|
|
data = []
|
|
with open(base_dir +"json/"+book+".json", 'r',encoding='utf-8') as file:
|
|
data = json.load(file)
|
|
|
|
pages = data["page"]
|
|
|
|
train_output = []
|
|
val_output = []
|
|
test_output = []
|
|
|
|
import math
|
|
import shutil
|
|
|
|
page_count = len(pages) - 2
|
|
div = page_count / 10
|
|
div = int(math.ceil(div))
|
|
|
|
|
|
|
|
|
|
train_arr = []
|
|
val_arr = []
|
|
test_arr = []
|
|
count = 0
|
|
for i in range(page_count,0,-1):
|
|
if count < div :
|
|
test_arr.append(i)
|
|
elif count < 2*div :
|
|
val_arr.append(i)
|
|
else:
|
|
train_arr.append(i)
|
|
count += 1
|
|
|
|
|
|
ann_id = 1
|
|
|
|
|
|
def create_annotation(ann, category_name, image_id, ann_id,uniq):
|
|
x_min = int(ann["@xmin"])
|
|
y_min = int(ann["@ymin"])
|
|
x_max = int(ann["@xmax"])
|
|
y_max = int(ann["@ymax"])
|
|
width = x_max - x_min
|
|
height = y_max - y_min
|
|
|
|
return {
|
|
"id": ann_id,
|
|
"image_id": uniq,
|
|
"category_id": category_map[category_name],
|
|
"bbox": [x_min, y_min, width, height],
|
|
"area": width * height,
|
|
"iscrowd": 0
|
|
}
|
|
|
|
def append_data(coco, ann_id,uniq):
|
|
|
|
coco["images"].append({
|
|
"id": uniq,
|
|
"width": page["@width"],
|
|
"height": page["@height"],
|
|
"file_name": f"{book}_{str(img_id).zfill(3)}.jpg"
|
|
})
|
|
|
|
|
|
|
|
for category in category_names:
|
|
for ann in page.get(category, []):
|
|
coco["annotations"].append(
|
|
create_annotation(ann, category, img_id, ann_id,uniq)
|
|
)
|
|
ann_id += 1
|
|
|
|
|
|
for page in pages:
|
|
uniq += 1
|
|
img_id = int(page["@index"])
|
|
if img_id == 0 or img_id == 1 :
|
|
continue
|
|
|
|
curr_img_path = os.path.join(config["manga109_root_dir"],"images",book,f"{str(img_id).zfill(3)}.jpg")
|
|
uniq_filename_mapping[uniq] = book+"_"+(curr_img_path.split("\\")[-1])
|
|
if img_id in train_arr:
|
|
append_data(coco_train, ann_id,uniq)
|
|
train_output.append(coco)
|
|
shutil.copyfile(curr_img_path,os.path.join(image_dir,"train/",book+"_"+(curr_img_path.split("\\")[-1])))
|
|
if img_id in val_arr:
|
|
append_data(coco_val, ann_id,uniq)
|
|
val_output.append(coco)
|
|
shutil.copyfile(curr_img_path,os.path.join(image_dir,"val/",book+"_"+(curr_img_path.split("\\")[-1])))
|
|
if img_id in test_arr:
|
|
append_data(coco_test, ann_id,uniq)
|
|
test_output.append(coco)
|
|
shutil.copyfile(curr_img_path,os.path.join(image_dir,"test/",book+"_"+(curr_img_path.split("\\")[-1])))
|
|
|
|
with open(base_dir+"temp_uniq_file_name_mapping.json","w",encoding="utf-8") as f:
|
|
json.dump(uniq_filename_mapping, f, ensure_ascii=False, indent=2)
|
|
with open(annotations_dir+"instances_train.json", "w", encoding="utf-8") as f:
|
|
json.dump(coco_train, f, ensure_ascii=False, indent=2)
|
|
with open(annotations_dir+"instances_val.json", "w", encoding="utf-8") as f:
|
|
json.dump(coco_val, f, ensure_ascii=False, indent=2)
|
|
with open(annotations_dir+"instances_test.json", "w", encoding="utf-8") as f:
|
|
json.dump(coco_test, f, ensure_ascii=False, indent=2)
|
|
|
|
create_coco(base_dir+"json/") |