Source code for gluoncv.model_zoo.coders

# pylint: disable=arguments-differ, missing-docstring
"""Encoder and Decoder functions.
Encoders are used during training, which assign training targets.
Decoders are used during testing/validation, which convert predictions back to
normal boxes, etc.
"""
from __future__ import absolute_import
from mxnet import nd
from mxnet import gluon
from .bbox import BBoxCornerToCenter


[docs]class NormalizedBoxCenterEncoder(gluon.Block): """Encode bounding boxes training target with normalized center offsets. Input bounding boxes are using corner type: `x_{min}, y_{min}, x_{max}, y_{max}`. Parameters ---------- stds : array-like of size 4 Std value to be divided from encoded values, default is (0.1, 0.1, 0.2, 0.2). """ def __init__(self, stds=(0.1, 0.1, 0.2, 0.2)): super(NormalizedBoxCenterEncoder, self).__init__() assert len(stds) == 4, "Box Encoder requires 4 std values." self._stds = stds with self.name_scope(): self.corner_to_center = BBoxCornerToCenter(split=True)
[docs] def forward(self, samples, matches, anchors, refs): """Forward""" F = nd # TODO(zhreshold): batch_pick, take multiple elements? ref_boxes = nd.repeat(refs.reshape((0, 1, -1, 4)), axis=1, repeats=matches.shape[1]) ref_boxes = nd.split(ref_boxes, axis=-1, num_outputs=4, squeeze_axis=True) ref_boxes = nd.concat(*[F.pick(ref_boxes[i], matches, axis=2).reshape((0, -1, 1)) \ for i in range(4)], dim=2) g = self.corner_to_center(ref_boxes) a = self.corner_to_center(anchors) t0 = (g[0] - a[0]) / a[2] / self._stds[0] t1 = (g[1] - a[1]) / a[3] / self._stds[1] t2 = F.log(g[2] / a[2]) / self._stds[2] t3 = F.log(g[3] / a[3]) / self._stds[3] codecs = F.concat(t0, t1, t2, t3, dim=2) temp = F.tile(samples.reshape((0, -1, 1)), reps=(1, 1, 4)) > 0.5 targets = F.where(temp, codecs, F.zeros_like(codecs)) masks = F.where(temp, F.ones_like(temp), F.zeros_like(temp)) return targets, masks
[docs]class NormalizedBoxCenterDecoder(gluon.HybridBlock): """Decode bounding boxes training target with normalized center offsets. This decoder must cooperate with NormalizedBoxCenterEncoder of same `stds` in order to get properly reconstructed bounding boxes. Returned bounding boxes are using corner type: `x_{min}, y_{min}, x_{max}, y_{max}`. Parameters ---------- stds : array-like of size 4 Std value to be divided from encoded values, default is (0.1, 0.1, 0.2, 0.2). """ def __init__(self, stds=(0.1, 0.1, 0.2, 0.2)): super(NormalizedBoxCenterDecoder, self).__init__() assert len(stds) == 4, "Box Encoder requires 4 std values." self._stds = stds
[docs] def hybrid_forward(self, F, x, anchors): a = anchors.split(axis=-1, num_outputs=4) p = F.split(x, axis=-1, num_outputs=4) ox = F.broadcast_add(F.broadcast_mul(p[0] * self._stds[0], a[2]), a[0]) oy = F.broadcast_add(F.broadcast_mul(p[1] * self._stds[1], a[3]), a[1]) ow = F.broadcast_mul(F.exp(p[2] * self._stds[2]), a[2]) / 2 oh = F.broadcast_mul(F.exp(p[3] * self._stds[3]), a[3]) / 2 return F.concat(ox - ow, oy - oh, ox + ow, oy + oh, dim=-1)
[docs]class MultiClassEncoder(gluon.HybridBlock): """Encode classification training target given matching results. This encoder will assign training target of matched bounding boxes to ground-truth label + 1 and negative samples with label 0. Ignored samples will be assigned with `ignore_label`, whose default is -1. Parameters ---------- ignore_label : float Assigned to un-matched samples, they are neither positive or negative during training, and should be excluded in loss function. Default is -1. """ def __init__(self, ignore_label=-1): super(MultiClassEncoder, self).__init__() self._ignore_label = ignore_label
[docs] def hybrid_forward(self, F, samples, matches, refs): refs = F.repeat(refs.reshape((0, 1, -1)), axis=1, repeats=matches.shape[1]) target_ids = F.pick(refs, matches, axis=2) + 1 targets = F.where(samples > 0.5, target_ids, nd.ones_like(target_ids) * self._ignore_label) targets = F.where(samples < -0.5, nd.zeros_like(targets), targets) return targets
[docs]class MultiClassDecoder(gluon.HybridBlock): """Decode classification results. This decoder must work with `MultiClassEncoder` to reconstruct valid labels. The decoder expect results are after logits, e.g. Softmax. Parameters ---------- axis : int Axis of class-wise results. thresh : float Confidence threshold for the post-softmax scores. Scores less than `thresh` are marked with `0`, corresponding `cls_id` is marked with invalid class id `-1`. """ def __init__(self, axis=-1, thresh=0.01): super(MultiClassDecoder, self).__init__() self._axis = axis self._thresh = thresh
[docs] def hybrid_forward(self, F, x): pos_x = x.slice_axis(axis=self._axis, begin=1, end=None) cls_id = F.argmax(pos_x, self._axis) scores = F.pick(pos_x, cls_id, axis=-1) mask = scores > self._thresh cls_id = F.where(mask, cls_id, F.ones_like(cls_id) * -1) scores = F.where(mask, scores, F.zeros_like(scores)) return cls_id, scores
[docs]class MultiPerClassDecoder(gluon.HybridBlock): """Decode classification results. This decoder must work with `MultiClassEncoder` to reconstruct valid labels. The decoder expect results are after logits, e.g. Softmax. This version is different from :py:class:`gluoncv.model_zoo.coders.MultiClassDecoder` with the following changes: For each position(anchor boxes), each foreground class can have their own results, rather than enforced to be the best one. For example, for a 5-class prediction with background(totaling 6 class), say (0.5, 0.1, 0.2, 0.1, 0.05, 0.05) as (bg, apple, orange, peach, grape, melon), `MultiClassDecoder` produce only one class id and score, that is (orange-0.2). `MultiPerClassDecoder` produce 5 results individually: (apple-0.1, orange-0.2, peach-0.1, grape-0.05, melon-0.05). Parameters ---------- num_class : int Number of classes including background. axis : int Axis of class-wise results. thresh : float Confidence threshold for the post-softmax scores. Scores less than `thresh` are marked with `0`, corresponding `cls_id` is marked with invalid class id `-1`. """ def __init__(self, num_class, axis=-1, thresh=0.01): super(MultiPerClassDecoder, self).__init__() self._fg_class = num_class - 1 self._axis = axis self._thresh = thresh
[docs] def hybrid_forward(self, F, x): scores = x.slice_axis(axis=self._axis, begin=1, end=None) # b x N x fg_class template = F.zeros_like(x.slice_axis(axis=-1, begin=0, end=1)) cls_ids = [] for i in range(self._fg_class): cls_ids.append(template + i) # b x N x 1 cls_id = F.concat(*cls_ids, dim=-1) # b x N x fg_class mask = scores > self._thresh cls_id = F.where(mask, cls_id, F.ones_like(cls_id) * -1) scores = F.where(mask, scores, F.zeros_like(scores)) return cls_id, scores