Skip to content
This repository has been archived by the owner on Jul 2, 2021. It is now read-only.

Commit

Permalink
Merge pull request #256 from chainer/reverse-coords-order
Browse files Browse the repository at this point in the history
Merge reverse-coords-order
  • Loading branch information
Hakuyume authored Jun 9, 2017
2 parents ca74f08 + 64526c2 commit 72297d2
Show file tree
Hide file tree
Showing 62 changed files with 392 additions and 352 deletions.
15 changes: 7 additions & 8 deletions chainercv/datasets/cub/cub_keypoint_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class CUBKeypointDataset(CUBDatasetBase):
Note that :math:`K=15` in CUB dataset. Also note that not all fifteen
keypoints are visible in an image. When a keypoint is not visible,
the values stored for that keypoint are undefined. The second axis
corresponds to the :math:`x` and :math:`y` coordinates of the
corresponds to the :math:`y` and :math:`x` coordinates of the
keypoints in the image.
A keypoint mask array indicates whether a keypoint is visible in the
Expand Down Expand Up @@ -74,7 +74,8 @@ def __init__(self, data_dir='auto', crop_bbox=True,
if id_ not in self.kp_mask_dict:
self.kp_mask_dict[id_] = []

keypoint = [float(v) for v in values[2:4]]
# (y, x) order
keypoint = [float(v) for v in values[2:4][::-1]]
kp_mask = bool(int(values[4]))

self.kp_dict[id_].append(keypoint)
Expand All @@ -92,9 +93,9 @@ def get_example(self, i):
kp_mask = np.array(self.kp_mask_dict[i], dtype=np.bool)

if self.crop_bbox:
bbox = self.bboxes[i] # (x, y, width, height)
img =\
img[:, bbox[1]: bbox[1] + bbox[3], bbox[0]: bbox[0] + bbox[2]]
# (y_min, x_min, y_max, x_max)
bbox = self.bboxes[i].astype(np.int32)
img = img[:, bbox[0]: bbox[2], bbox[1]: bbox[3]]
keypoint[:, :2] = keypoint[:, :2] - np.array([bbox[0], bbox[1]])

if not self.return_mask:
Expand All @@ -105,8 +106,6 @@ def get_example(self, i):
dtype=np.uint8,
color=False)
if self.crop_bbox:
mask = mask[:,
bbox[1]: bbox[1] + bbox[3],
bbox[0]: bbox[0] + bbox[2]]
mask = mask[:, bbox[0]: bbox[2], bbox[1]: bbox[3]]

return img, keypoint, kp_mask, mask
8 changes: 4 additions & 4 deletions chainercv/datasets/cub/cub_label_dataset.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import os

from chainercv.datasets.cub.cub_utils import CUBDatasetBase
Expand Down Expand Up @@ -50,9 +51,8 @@ def get_example(self, i):
os.path.join(self.data_dir, 'images', self.fns[i]), color=True)

if self.crop_bbox:
bbox = self.bboxes[i] # (x, y, width, height)
img = img[:,
bbox[1]: bbox[1] + bbox[3],
bbox[0]: bbox[0] + bbox[2]]
# (y_min, x_min, y_max, x_max)
bbox = self.bboxes[i].astype(np.int32)
img = img[:, bbox[0]: bbox[2], bbox[1]: bbox[3]]
label = self._data_labels[i]
return img, label
10 changes: 8 additions & 2 deletions chainercv/datasets/cub/cub_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import os

import chainer
Expand Down Expand Up @@ -58,8 +59,13 @@ def __init__(self, data_dir='auto', mask_dir='auto', crop_bbox=True):
bboxes_file = os.path.join(data_dir, 'bounding_boxes.txt')

self.fns = [fn.strip().split()[1] for fn in open(images_file)]
bboxes = [bbox.split()[1:] for bbox in open(bboxes_file)]
self.bboxes = [[int(float(elem)) for elem in bbox] for bbox in bboxes]
y_min = np.array([float(bb.split()[2]) for bb in open(bboxes_file)])
x_min = np.array([float(bb.split()[1]) for bb in open(bboxes_file)])
height = np.array([float(bb.split()[4]) for bb in open(bboxes_file)])
width = np.array([float(bb.split()[3]) for bb in open(bboxes_file)])
self.bboxes = np.stack(
(y_min, x_min, y_min + height, x_min + width),
axis=1).astype(np.float32)

self.crop_bbox = crop_bbox

Expand Down
4 changes: 2 additions & 2 deletions chainercv/datasets/voc/voc_detection_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ class VOCDetectionDataset(chainer.dataset.DatasetMixin):
The bounding boxes are packed into a two dimensional tensor of shape
:math:`(R, 4)`, where :math:`R` is the number of bounding boxes in
the image. The second axis represents attributes of the bounding box.
They are :obj:`(x_min, y_min, x_max, y_max)`, where the
They are :obj:`(y_min, x_min, y_max, x_max)`, where the
four attributes are coordinates of the bottom left and the top right
vertices.
Expand Down Expand Up @@ -119,7 +119,7 @@ def get_example(self, i):
# subtract 1 to make pixel indexes 0-based
bbox.append([
int(bndbox_anno.find(tag).text) - 1
for tag in ('xmin', 'ymin', 'xmax', 'ymax')])
for tag in ('ymin', 'xmin', 'ymax', 'xmax')])
name = obj.find('name').text.lower().strip()
label.append(voc_utils.voc_detection_label_names.index(name))
bbox = np.stack(bbox).astype(np.float32)
Expand Down
4 changes: 2 additions & 2 deletions chainercv/evaluations/eval_detection_voc_ap.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def eval_detection_voc_ap(
of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
where :math:`R` corresponds
to the number of bounding boxes, which may vary among boxes.
The second axis corresponds to :obj:`x_min, y_min, x_max, y_max`
The second axis corresponds to :obj:`y_min, x_min, y_max, x_max`
of a bounding box.
pred_labels (iterable of numpy.ndarray): An iterable of labels.
Similar to :obj:`pred_bboxes`, its index corresponds to an
Expand Down Expand Up @@ -94,7 +94,7 @@ def calc_detection_voc_prec_rec(
of bounding boxes. This is an array whose shape is :math:`(R, 4)`,
where :math:`R` corresponds
to the number of bounding boxes, which may vary among boxes.
The second axis corresponds to :obj:`x_min, y_min, x_max, y_max`
The second axis corresponds to :obj:`y_min, x_min, y_max, x_max`
of a bounding box.
pred_labels (iterable of numpy.ndarray): An iterable of labels.
Similar to :obj:`pred_bboxes`, its index corresponds to an
Expand Down
6 changes: 3 additions & 3 deletions chainercv/evaluations/eval_pck.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ def eval_pck(pred, expected, alpha, L):
:math:`0 < \\alpha < 1` is a variable we control.
:math:`L` is determined differently depending on the context. For example,
in evaluation of keypoint matching for CUB dataset,
:math:`L=\\sqrt{w^2 + h^2}` is used.
:math:`L=\\sqrt{h^2 + w^2}` is used.
Args:
pred (~numpy.ndarray): An array of shape :math:`(K, 2)`
:math:`N` is the number of keypoints to be evaluated. The
two elements of the second axis corresponds to :math:`x`
and :math:`y` coordinate of the keypoint.
two elements of the second axis corresponds to :math:`y`
and :math:`x` coordinate of the keypoint.
expected (~numpy.ndarray): Same kind of array as :obj:`pred`.
This contains ground truth location of the keypoints that
the user tries to predict.
Expand Down
2 changes: 1 addition & 1 deletion chainercv/extensions/detection/detection_vis_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class DetectionVisReport(chainer.training.extension.Extension):
:obj:`gt_bbox` and :obj:`pred_bbox` are float arrays
of shape :math:`(R, 4)`, where :math:`R` is the number of
bounding boxes in the image. Each bounding box is organized
by :obj:`(x_min, y_min, x_max, y_max)` in the second axis.
by :obj:`(y_min, x_min, y_max, x_max)` in the second axis.
:obj:`gt_label` and :obj:`pred_label` are intenger arrays
of shape :math:`(R,)`. Each label indicates the class of
Expand Down
10 changes: 5 additions & 5 deletions chainercv/links/model/faster_rcnn/faster_rcnn.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def __call__(self, x, scale=1., test=True):
:math:`(R',)`.
"""
img_size = x.shape[2:][::-1]
img_size = x.shape[2:]

h = self.extractor(x, test=test)
rpn_locs, rpn_scores, rois, roi_indices, anchor =\
Expand Down Expand Up @@ -214,7 +214,7 @@ def prepare(self, img):
if scale * max(H, W) > self.max_size:
scale = self.max_size / max(H, W)

img = resize(img, (int(W * scale), int(H * scale)))
img = resize(img, (int(H * scale), int(W * scale)))

img = (img - self.mean).astype(np.float32, copy=False)
return img
Expand Down Expand Up @@ -259,7 +259,7 @@ def predict(self, imgs):
* **bboxes**: A list of float arrays of shape :math:`(R, 4)`, \
where :math:`R` is the number of bounding boxes in a image. \
Each bouding box is organized by \
:obj:`(x_min, y_min, x_max, y_max)` \
:obj:`(y_min, x_min, y_max, x_max)` \
in the second axis.
* **labels** : A list of integer arrays of shape :math:`(R,)`. \
Each value indicates the class of the bounding box. \
Expand Down Expand Up @@ -305,9 +305,9 @@ def predict(self, imgs):
cls_bbox = cls_bbox.reshape(-1, self.n_class * 4)
# clip bounding box
cls_bbox[:, slice(0, 4, 2)] = self.xp.clip(
cls_bbox[:, slice(0, 4, 2)], 0, W / scale)
cls_bbox[:, slice(0, 4, 2)], 0, H / scale)
cls_bbox[:, slice(1, 4, 2)] = self.xp.clip(
cls_bbox[:, slice(1, 4, 2)], 0, H / scale)
cls_bbox[:, slice(1, 4, 2)], 0, W / scale)

prob = F.softmax(roi_score).data

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def __call__(self, imgs, bboxes, labels, scale):
raise ValueError('Currently only batch size 1 is supported.')

_, _, H, W = imgs.shape
img_size = (W, H)
img_size = (H, W)

features = self.faster_rcnn.extractor(imgs, test=not self.train)
rpn_locs, rpn_scores, rois, roi_indices, anchor = self.faster_rcnn.rpn(
Expand Down
16 changes: 12 additions & 4 deletions chainercv/links/model/faster_rcnn/faster_rcnn_vgg.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ class FasterRCNNVGG16(FasterRCNN):
'voc07': {
'n_fg_class': 20,
'url': 'https://github.com/yuyu2172/share-weights/releases/'
'download/0.0.2/faster_rcnn_vgg16_voc07_2017_05_24.npz'
'download/0.0.3/faster_rcnn_vgg16_voc07_2017_06_06.npz'
}
}
feat_stride = 16
Expand Down Expand Up @@ -227,10 +227,11 @@ def __call__(self, x, rois, roi_indices, test=True):
"""
roi_indices = roi_indices.astype(np.float32)
rois = self.xp.concatenate(
indices_and_rois = self.xp.concatenate(
(roi_indices[:, None], rois), axis=1)
pool = F.roi_pooling_2d(
x, rois, self.roi_size, self.roi_size, self.spatial_scale)
pool = _roi_pooling_2d_yx(
x, indices_and_rois, self.roi_size, self.roi_size,
self.spatial_scale)

fc6 = _relu(self.fc6(pool))
fc7 = _relu(self.fc7(fc6))
Expand Down Expand Up @@ -291,5 +292,12 @@ def __call__(self, x, test=True):
return h


def _roi_pooling_2d_yx(x, indices_and_rois, outh, outw, spatial_scale):
xy_indices_and_rois = indices_and_rois[:, [2, 1, 4, 3]]
pool = F.roi_pooling_2d(
x, xy_indices_and_rois, outh, outw, spatial_scale)
return pool


def _max_pooling_2d(x):
return F.max_pooling_2d(x, ksize=2)
14 changes: 7 additions & 7 deletions chainercv/links/model/faster_rcnn/region_proposal_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def __call__(self, x, img_size, scale=1., test=True):
Args:
x (~chainer.Variable): The Features extracted from images.
Its shape is :math:`(N, C, H, W)`.
img_size (tuple of ints): A tuple :obj:`width, height`,
img_size (tuple of ints): A tuple :obj:`height, width`,
which contains image size after scaling.
scale (float): The amount of scaling done to the input images after
reading them from files.
Expand Down Expand Up @@ -110,8 +110,8 @@ def __call__(self, x, img_size, scale=1., test=True):
"""
n, _, hh, ww = x.shape
anchor = _enumerate_shifted_anchor(
self.xp.array(self.anchor_base), self.feat_stride, ww, hh)
n_anchor = anchor.shape[0] // (ww * hh)
self.xp.array(self.anchor_base), self.feat_stride, hh, ww)
n_anchor = anchor.shape[0] // (hh * ww)
h = F.relu(self.conv1(x))

rpn_locs = self.loc(h)
Expand Down Expand Up @@ -139,19 +139,19 @@ def __call__(self, x, img_size, scale=1., test=True):
return rpn_locs, rpn_scores, rois, roi_indices, anchor


def _enumerate_shifted_anchor(anchor_base, feat_stride, width, height):
def _enumerate_shifted_anchor(anchor_base, feat_stride, height, width):
# Enumerate all shifted anchors:
#
# add A anchors (1, A, 4) to
# cell K shifts (K, 1, 4) to get
# shift anchors (K, A, 4)
# reshape to (K*A, 4) shifted anchors
xp = cuda.get_array_module(anchor_base)
shift_x = xp.arange(0, width * feat_stride, feat_stride)
shift_y = xp.arange(0, height * feat_stride, feat_stride)
shift_x = xp.arange(0, width * feat_stride, feat_stride)
shift_x, shift_y = xp.meshgrid(shift_x, shift_y)
shift = xp.stack((shift_x.ravel(), shift_y.ravel(),
shift_x.ravel(), shift_y.ravel()), axis=1)
shift = xp.stack((shift_y.ravel(), shift_x.ravel(),
shift_y.ravel(), shift_x.ravel()), axis=1)

A = anchor_base.shape[0]
K = shift.shape[0]
Expand Down
12 changes: 6 additions & 6 deletions chainercv/links/model/faster_rcnn/utils/anchor_target_creator.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __call__(self, bbox, anchor, img_size):
:math:`(R, 4)`.
anchor (array): Coordinates of anchors. Its shape is
:math:`(S, 4)`.
img_size (tuple of ints): A tuple :obj:`W, H`, which
img_size (tuple of ints): A tuple :obj:`H, W`, which
is a tuple of height and width of an image.
Returns:
Expand All @@ -74,10 +74,10 @@ def __call__(self, bbox, anchor, img_size):
bbox = cuda.to_cpu(bbox)
anchor = cuda.to_cpu(anchor)

img_W, img_H = img_size
img_H, img_W = img_size

n_anchor = len(anchor)
inside_index = _get_inside_index(anchor, img_W, img_H)
inside_index = _get_inside_index(anchor, img_H, img_W)
anchor = anchor[inside_index]
argmax_ious, label = self._create_label(
inside_index, anchor, bbox)
Expand Down Expand Up @@ -156,15 +156,15 @@ def _unmap(data, count, index, fill=0):
return ret


def _get_inside_index(anchor, W, H):
def _get_inside_index(anchor, H, W):
# Calc indicies of anchors which are located completely inside of the image
# whose size is speficied.
xp = cuda.get_array_module(anchor)

index_inside = xp.where(
(anchor[:, 0] >= 0) &
(anchor[:, 1] >= 0) &
(anchor[:, 2] <= W) & # width
(anchor[:, 3] <= H) # height
(anchor[:, 2] <= H) &
(anchor[:, 3] <= W)
)[0]
return index_inside
41 changes: 21 additions & 20 deletions chainercv/links/model/faster_rcnn/utils/bbox2loc.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,16 @@ def bbox2loc(src_bbox, dst_bbox):
Given bounding boxes, this function computes offsets and scales
to match the source bounding boxes to the target bounding boxes.
Mathematcially, given a bounding box whose center is :math:`p_x, p_y` and
size :math:`p_w, p_h` and the target bounding box whose center is
:math:`g_x, g_y` and size :math:`g_w, g_h`, the offsets and scales
:math:`t_x, t_y, t_w, t_h` can be computed by the following formulas.
Mathematcially, given a bounding box whose center is
:math:`(y, x) = p_y, p_x` and
size :math:`p_h, p_w` and the target bounding box whose center is
:math:`g_y, g_x` and size :math:`g_h, g_w`, the offsets and scales
:math:`t_y, t_x, t_h, t_w` can be computed by the following formulas.
* :math:`t_x = \\frac{(g_x - p_x)} {p_w}`
* :math:`t_y = \\frac{(g_y - p_y)} {p_h}`
* :math:`t_w = \\log(\\frac{g_w} {p_w})`
* :math:`t_x = \\frac{(g_x - p_x)} {p_w}`
* :math:`t_h = \\log(\\frac{g_h} {p_h})`
* :math:`t_w = \\log(\\frac{g_w} {p_w})`
The output is same type as the type of the inputs.
The encoding formulas are used in works such as R-CNN [#]_.
Expand All @@ -26,35 +27,35 @@ def bbox2loc(src_bbox, dst_bbox):
Args:
src_bbox (array): An image coordinate array whose shape is
:math:`(R, 4)`. :math:`R` is the number of bounding boxes.
These coordinates are used to compute :math:`p_x, p_y, p_w, p_h`.
These coordinates are used to compute :math:`p_y, p_x, p_h, p_w`.
dst_bbox (array): An image coordinate array whose shape is
:math:`(R, 4)`.
These coordinates are used to compute :math:`g_x, g_y, g_w, g_h`.
These coordinates are used to compute :math:`g_y, g_x, g_h, g_w`.
Returns:
array:
Bounding box offsets and scales from :obj:`src_bbox` \
to :obj:`dst_bbox`. \
This has shape :math:`(R, 4)`.
The second axis contains four values :math:`t_x, t_y, t_w, t_h`.
The second axis contains four values :math:`t_y, t_x, t_h, t_w`.
"""
xp = cuda.get_array_module(src_bbox)

width = src_bbox[:, 2] - src_bbox[:, 0]
height = src_bbox[:, 3] - src_bbox[:, 1]
ctr_x = src_bbox[:, 0] + 0.5 * width
ctr_y = src_bbox[:, 1] + 0.5 * height
height = src_bbox[:, 2] - src_bbox[:, 0]
width = src_bbox[:, 3] - src_bbox[:, 1]
ctr_y = src_bbox[:, 0] + 0.5 * height
ctr_x = src_bbox[:, 1] + 0.5 * width

base_width = dst_bbox[:, 2] - dst_bbox[:, 0]
base_height = dst_bbox[:, 3] - dst_bbox[:, 1]
base_ctr_x = dst_bbox[:, 0] + 0.5 * base_width
base_ctr_y = dst_bbox[:, 1] + 0.5 * base_height
base_height = dst_bbox[:, 2] - dst_bbox[:, 0]
base_width = dst_bbox[:, 3] - dst_bbox[:, 1]
base_ctr_y = dst_bbox[:, 0] + 0.5 * base_height
base_ctr_x = dst_bbox[:, 1] + 0.5 * base_width

dx = (base_ctr_x - ctr_x) / width
dy = (base_ctr_y - ctr_y) / height
dw = xp.log(base_width / width)
dx = (base_ctr_x - ctr_x) / width
dh = xp.log(base_height / height)
dw = xp.log(base_width / width)

loc = xp.vstack((dx, dy, dw, dh)).transpose()
loc = xp.vstack((dy, dx, dh, dw)).transpose()
return loc
Loading

0 comments on commit 72297d2

Please sign in to comment.