MMOCR

Runtime error

App Files Files Community

MMOCR / tests /test_models /test_detector.py

tomofi

Add application file

2366e36 over 3 years ago

raw

history blame

16.9 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	"""pytest tests/test_detector.py."""
	import copy
	import tempfile
	from functools import partial
	from os.path import dirname, exists, join

	import numpy as np
	import pytest
	import torch

	from mmocr.utils import revert_sync_batchnorm


	def _demo_mm_inputs(num_kernels=0, input_shape=(1, 3, 300, 300),
	num_items=None, num_classes=1): # yapf: disable
	"""Create a superset of inputs needed to run test or train batches.

	Args:
	input_shape (tuple): Input batch dimensions.

	num_items (None \| list[int]): Specifies the number of boxes
	for each batch item.

	num_classes (int): Number of distinct labels a box might have.
	"""
	from mmdet.core import BitmapMasks

	(N, C, H, W) = input_shape

	rng = np.random.RandomState(0)

	imgs = rng.rand(*input_shape)

	img_metas = [{
	'img_shape': (H, W, C),
	'ori_shape': (H, W, C),
	'pad_shape': (H, W, C),
	'filename': '<demo>.png',
	'scale_factor': np.array([1, 1, 1, 1]),
	'flip': False,
	} for _ in range(N)]

	gt_bboxes = []
	gt_labels = []
	gt_masks = []
	gt_kernels = []
	gt_effective_mask = []

	for batch_idx in range(N):
	if num_items is None:
	num_boxes = rng.randint(1, 10)
	else:
	num_boxes = num_items[batch_idx]

	cx, cy, bw, bh = rng.rand(num_boxes, 4).T

	tl_x = ((cx * W) - (W * bw / 2)).clip(0, W)
	tl_y = ((cy * H) - (H * bh / 2)).clip(0, H)
	br_x = ((cx * W) + (W * bw / 2)).clip(0, W)
	br_y = ((cy * H) + (H * bh / 2)).clip(0, H)

	boxes = np.vstack([tl_x, tl_y, br_x, br_y]).T
	class_idxs = [0] * num_boxes

	gt_bboxes.append(torch.FloatTensor(boxes))
	gt_labels.append(torch.LongTensor(class_idxs))
	kernels = []
	for kernel_inx in range(num_kernels):
	kernel = np.random.rand(H, W)
	kernels.append(kernel)
	gt_kernels.append(BitmapMasks(kernels, H, W))
	gt_effective_mask.append(BitmapMasks([np.ones((H, W))], H, W))

	mask = np.random.randint(0, 2, (len(boxes), H, W), dtype=np.uint8)
	gt_masks.append(BitmapMasks(mask, H, W))

	mm_inputs = {
	'imgs': torch.FloatTensor(imgs).requires_grad_(True),
	'img_metas': img_metas,
	'gt_bboxes': gt_bboxes,
	'gt_labels': gt_labels,
	'gt_bboxes_ignore': None,
	'gt_masks': gt_masks,
	'gt_kernels': gt_kernels,
	'gt_mask': gt_effective_mask,
	'gt_thr_mask': gt_effective_mask,
	'gt_text_mask': gt_effective_mask,
	'gt_center_region_mask': gt_effective_mask,
	'gt_radius_map': gt_kernels,
	'gt_sin_map': gt_kernels,
	'gt_cos_map': gt_kernels,
	}
	return mm_inputs


	def _get_config_directory():
	"""Find the predefined detector config directory."""
	try:
	# Assume we are running in the source mmocr repo
	repo_dpath = dirname(dirname(dirname(__file__)))
	except NameError:
	# For IPython development when this __file__ is not defined
	import mmocr
	repo_dpath = dirname(dirname(mmocr.__file__))
	config_dpath = join(repo_dpath, 'configs')
	if not exists(config_dpath):
	raise Exception('Cannot find config path')
	return config_dpath


	def _get_config_module(fname):
	"""Load a configuration as a python module."""
	from mmcv import Config
	config_dpath = _get_config_directory()
	config_fpath = join(config_dpath, fname)
	config_mod = Config.fromfile(config_fpath)
	return config_mod


	def _get_detector_cfg(fname):
	"""Grab configs necessary to create a detector.

	These are deep copied to allow for safe modification of parameters without
	influencing other tests.
	"""
	config = _get_config_module(fname)
	model = copy.deepcopy(config.model)
	return model


	@pytest.mark.parametrize('cfg_file', [
	'textdet/maskrcnn/mask_rcnn_r50_fpn_160e_ctw1500.py',
	'textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2015.py',
	'textdet/maskrcnn/mask_rcnn_r50_fpn_160e_icdar2017.py'
	])
	def test_ocr_mask_rcnn(cfg_file):
	model = _get_detector_cfg(cfg_file)
	model['pretrained'] = None

	from mmocr.models import build_detector
	detector = build_detector(model)

	input_shape = (1, 3, 224, 224)
	mm_inputs = _demo_mm_inputs(0, input_shape)

	imgs = mm_inputs.pop('imgs')
	img_metas = mm_inputs.pop('img_metas')
	gt_labels = mm_inputs.pop('gt_labels')
	gt_masks = mm_inputs.pop('gt_masks')

	# Test forward train
	gt_bboxes = mm_inputs['gt_bboxes']
	losses = detector.forward(
	imgs,
	img_metas,
	gt_bboxes=gt_bboxes,
	gt_labels=gt_labels,
	gt_masks=gt_masks)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	img_list = [g[None, :] for g in imgs]
	batch_results = []
	for one_img, one_meta in zip(img_list, img_metas):
	result = detector.forward([one_img], [[one_meta]],
	return_loss=False)
	batch_results.append(result)

	# Test show_result

	results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
	img = np.random.rand(5, 5)
	detector.show_result(img, results)


	@pytest.mark.parametrize('cfg_file', [
	'textdet/panet/panet_r18_fpem_ffm_600e_ctw1500.py',
	'textdet/panet/panet_r18_fpem_ffm_600e_icdar2015.py',
	'textdet/panet/panet_r50_fpem_ffm_600e_icdar2017.py'
	])
	def test_panet(cfg_file):
	model = _get_detector_cfg(cfg_file)
	model['pretrained'] = None

	from mmocr.models import build_detector
	detector = build_detector(model)
	detector = revert_sync_batchnorm(detector)

	input_shape = (1, 3, 224, 224)
	num_kernels = 2
	mm_inputs = _demo_mm_inputs(num_kernels, input_shape)

	imgs = mm_inputs.pop('imgs')
	img_metas = mm_inputs.pop('img_metas')
	gt_kernels = mm_inputs.pop('gt_kernels')
	gt_mask = mm_inputs.pop('gt_mask')

	# Test forward train
	losses = detector.forward(
	imgs, img_metas, gt_kernels=gt_kernels, gt_mask=gt_mask)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	img_list = [g[None, :] for g in imgs]
	batch_results = []
	for one_img, one_meta in zip(img_list, img_metas):
	result = detector.forward([one_img], [[one_meta]],
	return_loss=False)
	batch_results.append(result)

	# Test onnx export
	detector.forward = partial(
	detector.simple_test, img_metas=img_metas, rescale=True)
	with tempfile.TemporaryDirectory() as tmpdirname:
	onnx_path = f'{tmpdirname}/tmp.onnx'
	torch.onnx.export(
	detector, (img_list[0], ),
	onnx_path,
	input_names=['input'],
	output_names=['output'],
	export_params=True,
	keep_initializers_as_inputs=False)

	# Test show result
	results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
	img = np.random.rand(5, 5)
	detector.show_result(img, results)


	@pytest.mark.parametrize('cfg_file', [
	'textdet/psenet/psenet_r50_fpnf_600e_icdar2015.py',
	'textdet/psenet/psenet_r50_fpnf_600e_icdar2017.py',
	'textdet/psenet/psenet_r50_fpnf_600e_ctw1500.py'
	])
	def test_psenet(cfg_file):
	model = _get_detector_cfg(cfg_file)
	model['pretrained'] = None

	from mmocr.models import build_detector
	detector = build_detector(model)
	detector = revert_sync_batchnorm(detector)

	input_shape = (1, 3, 224, 224)
	num_kernels = 7
	mm_inputs = _demo_mm_inputs(num_kernels, input_shape)

	imgs = mm_inputs.pop('imgs')
	img_metas = mm_inputs.pop('img_metas')
	gt_kernels = mm_inputs.pop('gt_kernels')
	gt_mask = mm_inputs.pop('gt_mask')

	# Test forward train
	losses = detector.forward(
	imgs, img_metas, gt_kernels=gt_kernels, gt_mask=gt_mask)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	img_list = [g[None, :] for g in imgs]
	batch_results = []
	for one_img, one_meta in zip(img_list, img_metas):
	result = detector.forward([one_img], [[one_meta]],
	return_loss=False)
	batch_results.append(result)

	# Test show result
	results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
	img = np.random.rand(5, 5)
	detector.show_result(img, results)


	@pytest.mark.skipif(not torch.cuda.is_available(), reason='requires cuda')
	@pytest.mark.parametrize('cfg_file', [
	'textdet/dbnet/dbnet_r18_fpnc_1200e_icdar2015.py',
	'textdet/dbnet/dbnet_r50dcnv2_fpnc_1200e_icdar2015.py'
	])
	def test_dbnet(cfg_file):
	model = _get_detector_cfg(cfg_file)
	model['pretrained'] = None

	from mmocr.models import build_detector
	detector = build_detector(model)
	detector = revert_sync_batchnorm(detector)
	detector = detector.cuda()
	input_shape = (1, 3, 224, 224)
	num_kernels = 7
	mm_inputs = _demo_mm_inputs(num_kernels, input_shape)

	imgs = mm_inputs.pop('imgs')
	imgs = imgs.cuda()
	img_metas = mm_inputs.pop('img_metas')
	gt_shrink = mm_inputs.pop('gt_kernels')
	gt_shrink_mask = mm_inputs.pop('gt_mask')
	gt_thr = mm_inputs.pop('gt_masks')
	gt_thr_mask = mm_inputs.pop('gt_thr_mask')

	# Test forward train
	losses = detector.forward(
	imgs,
	img_metas,
	gt_shrink=gt_shrink,
	gt_shrink_mask=gt_shrink_mask,
	gt_thr=gt_thr,
	gt_thr_mask=gt_thr_mask)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	img_list = [g[None, :] for g in imgs]
	batch_results = []
	for one_img, one_meta in zip(img_list, img_metas):
	result = detector.forward([one_img], [[one_meta]],
	return_loss=False)
	batch_results.append(result)

	# Test show result
	results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
	img = np.random.rand(5, 5)
	detector.show_result(img, results)


	@pytest.mark.parametrize(
	'cfg_file',
	['textdet/textsnake/'
	'textsnake_r50_fpn_unet_1200e_ctw1500.py'])
	def test_textsnake(cfg_file):
	model = _get_detector_cfg(cfg_file)
	model['pretrained'] = None

	from mmocr.models import build_detector
	detector = build_detector(model)
	detector = revert_sync_batchnorm(detector)
	input_shape = (1, 3, 224, 224)
	num_kernels = 1
	mm_inputs = _demo_mm_inputs(num_kernels, input_shape)

	imgs = mm_inputs.pop('imgs')
	img_metas = mm_inputs.pop('img_metas')
	gt_text_mask = mm_inputs.pop('gt_text_mask')
	gt_center_region_mask = mm_inputs.pop('gt_center_region_mask')
	gt_mask = mm_inputs.pop('gt_mask')
	gt_radius_map = mm_inputs.pop('gt_radius_map')
	gt_sin_map = mm_inputs.pop('gt_sin_map')
	gt_cos_map = mm_inputs.pop('gt_cos_map')

	# Test forward train
	losses = detector.forward(
	imgs,
	img_metas,
	gt_text_mask=gt_text_mask,
	gt_center_region_mask=gt_center_region_mask,
	gt_mask=gt_mask,
	gt_radius_map=gt_radius_map,
	gt_sin_map=gt_sin_map,
	gt_cos_map=gt_cos_map)
	assert isinstance(losses, dict)

	# Test forward test get_boundary
	maps = torch.zeros((1, 5, 224, 224), dtype=torch.float)
	maps[:, 0:2, :, :] = -10.
	maps[:, 0, 60:100, 12:212] = 10.
	maps[:, 1, 70:90, 22:202] = 10.
	maps[:, 2, 70:90, 22:202] = 0.
	maps[:, 3, 70:90, 22:202] = 1.
	maps[:, 4, 70:90, 22:202] = 10.

	one_meta = img_metas[0]
	result = detector.bbox_head.get_boundary(maps, [one_meta], False)
	assert 'boundary_result' in result
	assert 'filename' in result

	# Test show result
	results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
	img = np.random.rand(5, 5)
	detector.show_result(img, results)


	@pytest.mark.skipif(not torch.cuda.is_available(), reason='requires cuda')
	@pytest.mark.parametrize('cfg_file', [
	'textdet/fcenet/fcenet_r50dcnv2_fpn_1500e_ctw1500.py',
	'textdet/fcenet/fcenet_r50_fpn_1500e_icdar2015.py'
	])
	def test_fcenet(cfg_file):
	model = _get_detector_cfg(cfg_file)
	model['pretrained'] = None

	from mmocr.models import build_detector
	detector = build_detector(model)
	detector = revert_sync_batchnorm(detector)
	detector = detector.cuda()

	fourier_degree = 5
	input_shape = (1, 3, 256, 256)
	(n, c, h, w) = input_shape

	imgs = torch.randn(n, c, h, w).float().cuda()
	img_metas = [{
	'img_shape': (h, w, c),
	'ori_shape': (h, w, c),
	'pad_shape': (h, w, c),
	'filename': '<demo>.png',
	'scale_factor': np.array([1, 1, 1, 1]),
	'flip': False,
	} for _ in range(n)]

	p3_maps = []
	p4_maps = []
	p5_maps = []
	for _ in range(n):
	p3_maps.append(
	np.random.random((5 + 4 * fourier_degree, h // 8, w // 8)))
	p4_maps.append(
	np.random.random((5 + 4 * fourier_degree, h // 16, w // 16)))
	p5_maps.append(
	np.random.random((5 + 4 * fourier_degree, h // 32, w // 32)))

	# Test forward train
	losses = detector.forward(
	imgs, img_metas, p3_maps=p3_maps, p4_maps=p4_maps, p5_maps=p5_maps)
	assert isinstance(losses, dict)

	# Test forward test
	with torch.no_grad():
	img_list = [g[None, :] for g in imgs]
	batch_results = []
	for one_img, one_meta in zip(img_list, img_metas):
	result = detector.forward([one_img], [[one_meta]],
	return_loss=False)
	batch_results.append(result)

	# Test show result
	results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
	img = np.random.rand(5, 5)
	detector.show_result(img, results)


	@pytest.mark.parametrize(
	'cfg_file', ['textdet/drrg/'
	'drrg_r50_fpn_unet_1200e_ctw1500.py'])
	def test_drrg(cfg_file):
	model = _get_detector_cfg(cfg_file)
	model['pretrained'] = None

	from mmocr.models import build_detector
	detector = build_detector(model)
	detector = revert_sync_batchnorm(detector)

	input_shape = (1, 3, 224, 224)
	num_kernels = 1
	mm_inputs = _demo_mm_inputs(num_kernels, input_shape)

	imgs = mm_inputs.pop('imgs')
	img_metas = mm_inputs.pop('img_metas')
	gt_text_mask = mm_inputs.pop('gt_text_mask')
	gt_center_region_mask = mm_inputs.pop('gt_center_region_mask')
	gt_mask = mm_inputs.pop('gt_mask')
	gt_top_height_map = mm_inputs.pop('gt_radius_map')
	gt_bot_height_map = gt_top_height_map.copy()
	gt_sin_map = mm_inputs.pop('gt_sin_map')
	gt_cos_map = mm_inputs.pop('gt_cos_map')
	num_rois = 32
	x = np.random.randint(4, 224, (num_rois, 1))
	y = np.random.randint(4, 224, (num_rois, 1))
	h = 4 * np.ones((num_rois, 1))
	w = 4 * np.ones((num_rois, 1))
	angle = (np.random.random_sample((num_rois, 1)) * 2 - 1) * np.pi / 2
	cos, sin = np.cos(angle), np.sin(angle)
	comp_labels = np.random.randint(1, 3, (num_rois, 1))
	num_rois = num_rois * np.ones((num_rois, 1))
	comp_attribs = np.hstack([num_rois, x, y, h, w, cos, sin, comp_labels])
	gt_comp_attribs = np.expand_dims(comp_attribs.astype(np.float32), axis=0)

	# Test forward train
	losses = detector.forward(
	imgs,
	img_metas,
	gt_text_mask=gt_text_mask,
	gt_center_region_mask=gt_center_region_mask,
	gt_mask=gt_mask,
	gt_top_height_map=gt_top_height_map,
	gt_bot_height_map=gt_bot_height_map,
	gt_sin_map=gt_sin_map,
	gt_cos_map=gt_cos_map,
	gt_comp_attribs=gt_comp_attribs)
	assert isinstance(losses, dict)

	# Test forward test
	model['bbox_head']['in_channels'] = 6
	model['bbox_head']['text_region_thr'] = 0.8
	model['bbox_head']['center_region_thr'] = 0.8
	detector = build_detector(model)
	maps = torch.zeros((1, 6, 224, 224), dtype=torch.float)
	maps[:, 0:2, :, :] = -10.
	maps[:, 0, 60:100, 50:170] = 10.
	maps[:, 1, 75:85, 60:160] = 10.
	maps[:, 2, 75:85, 60:160] = 0.
	maps[:, 3, 75:85, 60:160] = 1.
	maps[:, 4, 75:85, 60:160] = 10.
	maps[:, 5, 75:85, 60:160] = 10.

	with torch.no_grad():
	full_pass_weight = torch.zeros((6, 6, 1, 1))
	for i in range(6):
	full_pass_weight[i, i, 0, 0] = 1
	detector.bbox_head.out_conv.weight.data = full_pass_weight
	detector.bbox_head.out_conv.bias.data.fill_(0.)
	outs = detector.bbox_head.single_test(maps)
	boundaries = detector.bbox_head.get_boundary(*outs, img_metas, True)
	assert len(boundaries) == 1

	# Test show result
	results = {'boundary_result': [[0, 0, 1, 0, 1, 1, 0, 1, 0.9]]}
	img = np.random.rand(5, 5)
	detector.show_result(img, results)