Spaces:

janmayjay
/

Sonofica

Running

App Files Files Community

Sonofica / utils /json_to_gcn.py

janmayjay

Add application file

39a7537 about 1 month ago

raw

history blame

20.8 kB

	"""
	Enhanced Manga Panel Dataset Converter: Extract frames as panels and prepare GCN-ready dataset.
	Crops each frame from manga pages and creates dataset with elements within each panel.
	"""

	import json
	import os
	import argparse
	from pathlib import Path
	from typing import Dict, List, Any, Optional, Tuple
	import difflib
	import math
	import cv2
	import numpy as np


	class MangaPanelDatasetConverter:
	def __init__(self, fuzzy_mapping: bool = True, proximity_threshold: float = 200.0):
	self.fuzzy_mapping = fuzzy_mapping
	self.proximity_threshold = proximity_threshold
	self.character_mapping = {}
	self.stats = {
	'total_pages': 0,
	'total_panels': 0,
	'total_bubbles': 0,
	'total_faces': 0,
	'total_bodies': 0,
	'successful_links': 0,
	'failed_links': 0,
	'unique_characters': set(),
	'extracted_panels': 0
	}

	def load_manga_data(self, file_path: str) -> Dict[str, Any]:
	"""Load manga data from JSON file."""
	with open(file_path, 'r', encoding='utf-8') as f:
	return json.load(f)

	def build_character_mapping(self, character_list: List[Dict]) -> Dict[str, str]:
	"""Build character ID to name mapping from character list."""
	mapping = {}
	for char in character_list:
	mapping_id = char.get('@id', '')
	char_name = char.get('@name', '')
	if mapping_id and char_name:
	mapping[mapping_id] = char_name
	return mapping

	def check_overlap(self, bbox1: List[int], bbox2: List[int]) -> bool:
	"""Check if two bounding boxes overlap."""
	x1_min, y1_min, x1_max, y1_max = bbox1
	x2_min, y2_min, x2_max, y2_max = bbox2

	return not (x1_max <= x2_min or x2_max <= x1_min or
	y1_max <= y2_min or y2_max <= y1_min)

	def is_element_in_frame(self, element_bbox: List[int], frame_bbox: List[int],
	overlap_threshold: float = 0.5) -> bool:
	"""
	Check if an element (face, body, text) is within or significantly overlaps with a frame.

	Args:
	element_bbox: [xmin, ymin, xmax, ymax] of the element
	frame_bbox: [xmin, ymin, xmax, ymax] of the frame
	overlap_threshold: Minimum overlap ratio to consider element as belonging to frame
	"""
	if not self.check_overlap(element_bbox, frame_bbox):
	return False

	# Calculate intersection area
	x_left = max(element_bbox[0], frame_bbox[0])
	y_top = max(element_bbox[1], frame_bbox[1])
	x_right = min(element_bbox[2], frame_bbox[2])
	y_bottom = min(element_bbox[3], frame_bbox[3])

	if x_right <= x_left or y_bottom <= y_top:
	return False

	intersection_area = (x_right - x_left) * (y_bottom - y_top)
	element_area = (element_bbox[2] - element_bbox[0]) * (element_bbox[3] - element_bbox[1])

	# Check if significant portion of element is within frame
	overlap_ratio = intersection_area / element_area if element_area > 0 else 0
	return overlap_ratio >= overlap_threshold

	def adjust_coordinates_to_frame(self, element_bbox: List[int], frame_bbox: List[int]) -> List[int]:
	"""Adjust element coordinates to be relative to the cropped frame."""
	frame_x_min, frame_y_min = frame_bbox[0], frame_bbox[1]

	adjusted_bbox = [
	element_bbox[0] - frame_x_min, # xmin
	element_bbox[1] - frame_y_min, # ymin
	element_bbox[2] - frame_x_min, # xmax
	element_bbox[3] - frame_y_min # ymax
	]

	# Ensure coordinates are not negative
	adjusted_bbox = [max(0, coord) for coord in adjusted_bbox]

	return adjusted_bbox

	def extract_panel_from_page(self, image_path: str, frame_data: Dict,
	output_dir: str, page_index: int, frame_index: int) -> Optional[str]:
	"""
	Extract a single panel (frame) from manga page image.

	Args:
	image_path: Path to the manga page image
	frame_data: Frame annotation with coordinates
	output_dir: Directory to save extracted panel
	page_index: Index of the current page
	frame_index: Index of the frame within the page

	Returns:
	Path to extracted panel image or None if failed
	"""
	# print(image_path)
	# Load the image
	image = cv2.imread(image_path)
	if image is None:
	print(f"Warning: Could not load image {image_path}")
	return None

	try:
	# Get frame coordinates
	xmin = int(frame_data.get('@xmin', 0))
	ymin = int(frame_data.get('@ymin', 0))
	xmax = int(frame_data.get('@xmax', image.shape[1]))
	ymax = int(frame_data.get('@ymax', image.shape[0]))

	# Validate and clip coordinates
	xmin = max(0, xmin)
	ymin = max(0, ymin)
	xmax = min(image.shape[1], xmax)
	ymax = min(image.shape[0], ymax)

	if xmax <= xmin or ymax <= ymin:
	print(f"Warning: Invalid frame coordinates for page {page_index}, frame {frame_index}")
	return None

	# Crop the panel
	cropped_panel = image[ymin:ymax, xmin:xmax]

	# Generate panel filename
	panel_filename = f"page_{page_index:04d}_panel_{frame_index:03d}.jpg"
	panel_path = os.path.join(output_dir, panel_filename)

	# Save the cropped panel
	success = cv2.imwrite(panel_path, cropped_panel)
	if success:
	self.stats['extracted_panels'] += 1
	# print(f"Extracted panel: {panel_path}")
	return panel_path
	else:
	print(f"Warning: Failed to save panel {panel_path}")
	return None

	except Exception as e:
	print(f"Error extracting panel from page {page_index}, frame {frame_index}: {str(e)}")
	return None

	def calculate_distance(self, bbox1: List[int], bbox2: List[int]) -> float:
	"""Calculate Euclidean distance between centers of two bounding boxes."""
	cx1, cy1 = (bbox1[0] + bbox1[2]) / 2, (bbox1[1] + bbox1[3]) / 2
	cx2, cy2 = (bbox2[0] + bbox2[2]) / 2, (bbox2[1] + bbox2[3]) / 2
	return math.sqrt((cx1 - cx2)2 + (cy1 - cy2)2)

	def find_character_by_body_id(self, body_id: str, bodies: List[Dict]) -> Optional[str]:
	"""Find character ID associated with a body ID."""
	for body in bodies:
	if body.get('@id') == body_id:
	return body.get('@character')
	return None

	def find_face_by_character(self, character_id: str, faces: List[Dict]) -> Optional[int]:
	"""Find face index by character ID."""
	for i, face in enumerate(faces):
	if face.get('@character') == character_id:
	return i
	return None

	def find_closest_face(self, bubble_bbox: List[int], faces: List[Dict]) -> Optional[int]:
	"""Find closest face to bubble based on spatial proximity."""
	if not faces:
	return None

	min_distance = float('inf')
	closest_face_idx = None

	for i, face in enumerate(faces):
	face_bbox = [
	face['bbox'][0], face['bbox'][1],
	face['bbox'][2], face['bbox'][3]
	]
	distance = self.calculate_distance(bubble_bbox, face_bbox)

	if distance < min_distance and distance <= self.proximity_threshold:
	min_distance = distance
	closest_face_idx = i

	return closest_face_idx

	def associate_bubble_to_face(self, text_item: Dict, faces: List[Dict],
	bodies: List[Dict], original_bodies: List[Dict]) -> Optional[int]:
	"""Associate a speech bubble to a face using multiple strategies."""
	bubble_bbox = text_item['bbox']

	# Strategy 1: Direct body ID to character mapping
	body_id = text_item.get('body_ref')
	if body_id:
	character_id = self.find_character_by_body_id(body_id, original_bodies)
	if character_id:
	face_idx = self.find_face_by_character(character_id, faces)
	if face_idx is not None:
	return face_idx

	# Strategy 2: Spatial proximity - find closest face
	closest_face_idx = self.find_closest_face(bubble_bbox, faces)
	if closest_face_idx is not None:
	return closest_face_idx

	# Strategy 3: Single face fallback
	if len(faces) == 1:
	return 0

	return None

	def process_panel(self, page_data: Dict, frame_data: Dict, page_index: int,
	frame_index: int, character_mapping: Dict[str, str],
	image_path: str = None, panels_output_dir: str = None) -> Dict[str, Any]:
	"""Process a single panel (frame) and create GCN dataset entry."""

	# Extract panel image if paths provided
	panel_image_path = None
	if image_path and panels_output_dir:
	panel_image_path = self.extract_panel_from_page(
	image_path, frame_data, panels_output_dir, page_index, frame_index
	)

	# Get frame bounding box
	frame_bbox = [
	frame_data['@xmin'], frame_data['@ymin'],
	frame_data['@xmax'], frame_data['@ymax']
	]

	panel_width = frame_bbox[2] - frame_bbox[0]
	panel_height = frame_bbox[3] - frame_bbox[1]

	# Find elements within this frame
	panel_faces = []
	panel_bodies = []
	panel_bubbles = []

	# Process faces
	for face in page_data.get('face', []):
	face_bbox = [face['@xmin'], face['@ymin'], face['@xmax'], face['@ymax']]
	if self.is_element_in_frame(face_bbox, frame_bbox):
	adjusted_bbox = self.adjust_coordinates_to_frame(face_bbox, frame_bbox)
	character_id = face.get('@character', '')
	character_name = character_mapping.get(character_id, character_id)

	panel_faces.append({
	"face_id": len(panel_faces),
	"bbox": adjusted_bbox,
	"original_id": face.get('@id', ''),
	"attributes": {
	"character_id": character_id,
	"character_name": character_name
	}
	})

	if character_id:
	self.stats['unique_characters'].add(character_id)

	# Process bodies
	for body in page_data.get('body', []):
	body_bbox = [body['@xmin'], body['@ymin'], body['@xmax'], body['@ymax']]
	if self.is_element_in_frame(body_bbox, frame_bbox):
	adjusted_bbox = self.adjust_coordinates_to_frame(body_bbox, frame_bbox)
	character_id = body.get('@character', '')
	character_name = character_mapping.get(character_id, character_id)

	panel_bodies.append({
	"body_id": len(panel_bodies),
	"bbox": adjusted_bbox,
	"original_id": body.get('@id', ''),
	"attributes": {
	"character_id": character_id,
	"character_name": character_name
	}
	})

	# Process text/speech bubbles
	for text in page_data.get('text', []):
	text_bbox = [text['@xmin'], text['@ymin'], text['@xmax'], text['@ymax']]
	if self.is_element_in_frame(text_bbox, frame_bbox):
	adjusted_bbox = self.adjust_coordinates_to_frame(text_bbox, frame_bbox)

	panel_bubbles.append({
	"bubble_id": len(panel_bubbles),
	"bbox": adjusted_bbox,
	"text": text.get('#text', ''),
	"original_id": text.get('@id', ''),
	"body_ref": text.get('@bodyid', '')
	})

	# Create bubble-to-face links
	links = []
	original_bodies = page_data.get('body', [])

	for bubble in panel_bubbles:
	face_idx = self.associate_bubble_to_face(
	bubble, panel_faces, panel_bodies, original_bodies
	)
	if face_idx is not None:
	links.append({
	"bubble_id": bubble['bubble_id'],
	"face_id": face_idx
	})
	self.stats['successful_links'] += 1
	else:
	self.stats['failed_links'] += 1

	# Update statistics
	self.stats['total_panels'] += 1
	self.stats['total_bubbles'] += len(panel_bubbles)
	self.stats['total_faces'] += len(panel_faces)
	self.stats['total_bodies'] += len(panel_bodies)

	# Create panel ID
	panel_id = f"page_{page_index:04d}_panel_{frame_index:03d}"

	return {
	"panel_id": panel_id,
	"width": panel_width,
	"height": panel_height,
	"panel_image_path": panel_image_path,
	"bubbles": panel_bubbles,
	"faces": panel_faces,
	"bodies": panel_bodies,
	"links": links,
	"metadata": {
	"original_page_index": page_index,
	"original_frame_index": frame_index,
	"original_frame_id": frame_data.get('@id', ''),
	"original_frame_bbox": frame_bbox
	}
	}

	def convert_dataset(self, input_file: str, output_file: str,
	image_dir: str = None, panels_output_dir: str = None) -> Dict[str, Any]:
	"""Convert manga dataset to panel-based GCN format."""
	print(f"Loading manga data from: {input_file}")
	manga_data = self.load_manga_data(input_file)

	# Extract title and character mapping
	title = manga_data.get('title', 'Unknown')
	character_list = manga_data.get('character', [])
	character_mapping = self.build_character_mapping(character_list)

	print(f"Title: {title}")
	print(f"Characters found: {len(character_mapping)}")

	# Create panels output directory if specified
	if panels_output_dir:
	os.makedirs(panels_output_dir, exist_ok=True)
	print(f"Panel extraction enabled. Output directory: {panels_output_dir}")

	# Process all pages and frames
	pages = manga_data.get('page', [])
	converted_panels = []

	for page_idx, page_data in enumerate(pages):
	self.stats['total_pages'] += 1

	# Construct image path if image directory provided
	image_path = os.path.join(image_dir,f"{str(page_data.get('@index', page_idx)).zfill(3)}.jpg")

	# Process each frame in the page
	frames = page_data.get('frame', [])
	for frame_idx, frame_data in enumerate(frames):
	panel = self.process_panel(
	page_data, frame_data, page_idx, frame_idx,
	character_mapping, image_path, panels_output_dir
	)
	converted_panels.append(panel)

	# Create final dataset
	dataset = {
	"title": title,
	"character_mapping": character_mapping,
	"panels": converted_panels,
	"conversion_stats": {
	"total_pages": self.stats['total_pages'],
	"total_panels": self.stats['total_panels'],
	"total_bubbles": self.stats['total_bubbles'],
	"total_faces": self.stats['total_faces'],
	"total_bodies": self.stats['total_bodies'],
	"successful_links": self.stats['successful_links'],
	"failed_links": self.stats['failed_links'],
	"extracted_panels": self.stats['extracted_panels'],
	"unique_characters": len(self.stats['unique_characters']),
	"link_success_rate": (self.stats['successful_links'] /
	max(1, self.stats['total_bubbles'])) * 100
	}
	}

	# Save converted dataset
	print(f"Saving converted dataset to: {output_file}")
	with open(output_file, 'w', encoding='utf-8') as f:
	json.dump(dataset, f, ensure_ascii=False, indent=2)

	# Print statistics
	self.print_conversion_stats()

	return dataset

	def print_conversion_stats(self):
	"""Print conversion statistics."""
	print("\n=== Conversion Statistics ===")
	print(f"Total pages processed: {self.stats['total_pages']}")
	print(f"Total panels extracted: {self.stats['total_panels']}")
	print(f"Total speech bubbles: {self.stats['total_bubbles']}")
	print(f"Total faces: {self.stats['total_faces']}")
	print(f"Total bodies: {self.stats['total_bodies']}")
	print(f"Successful links: {self.stats['successful_links']}")
	print(f"Failed links: {self.stats['failed_links']}")
	print(f"Panel images extracted: {self.stats['extracted_panels']}")
	print(f"Unique characters: {len(self.stats['unique_characters'])}")
	if self.stats['total_bubbles'] > 0:
	success_rate = (self.stats['successful_links'] / self.stats['total_bubbles']) * 100
	print(f"Link success rate: {success_rate:.1f}%")
	print("=" * 30)


	def json_to_gcn(config):
	# !python ./frame.py ../json/AkkeraKanjinchou.json ./output2.json --image-dir ../Manga109/images/AkkeraKanjinchou/ --panels-output-dir ./frames1/
	# parser = argparse.ArgumentParser(description='Convert manga dataset to panel-based GCN format')
	# parser.add_argument('input_file', help='Input JSON file with manga annotations')
	# parser.add_argument('output_file', help='Output JSON file for GCN dataset')
	# parser.add_argument('--image-dir', type=str, required=True,
	# help='Directory containing manga page images')
	# parser.add_argument('--panels-output-dir', type=str, required=True,
	# help='Directory to save extracted panel images')
	# parser.add_argument('--fuzzy-mapping', action='store_true', default=True,
	# help='Enable fuzzy ID matching (default: True)')
	# parser.add_argument('--proximity-threshold', type=float, default=200.0,
	# help='Maximum distance for spatial proximity matching (default: 200.0)')
	# parser.add_argument('--overlap-threshold', type=float, default=0.5,
	# help='Minimum overlap ratio for element-frame association (default: 0.5)')

	# args = parser.parse_args()
	base_dir = config["root"]
	manga109_root_dir = config["manga109_root_dir"]
	root_json = config["root_json"]

	for book_json in os.listdir(root_json):
	book_name = str(book_json).split(".")[0]
	panel_dir = os.path.join(base_dir,"panels",book_name)
	if not os.path.exists(panel_dir):
	os.makedirs(panel_dir)

	# Convert dataset
	converter = MangaPanelDatasetConverter(
	# fuzzy_mapping=args.fuzzy_mapping,
	# proximity_threshold=args.proximity_threshold
	)
	input_file = os.path.join(root_json,book_json)
	output_file = os.path.join(base_dir,"panel_data",book_json)
	image_dir = os.path.join(manga109_root_dir,"images",book_name)
	try:
	result = converter.convert_dataset(
	input_file,
	output_file,
	image_dir,
	panel_dir
	)
	print(f"\nConversion completed successfully!")
	print(f"GCN dataset saved to: {output_file}")
	print(f"Panel images saved to: {panel_dir}")

	except Exception as e:
	print(f"Error during conversion: {str(e)}")