// conv_op_impl.h is the templated implementation of the conv_op.h file.
|
#ifndef CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
|
#define CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
|
|
#include "caffe2/core/context.h"
|
#include "caffe2/core/flags.h"
|
#include "caffe2/core/logging.h"
|
#include "caffe2/core/operator.h"
|
#include "caffe2/operators/conv_pool_op_base.h"
|
#include "caffe2/operators/deform_conv_op.h"
|
#include "caffe2/utils/math.h"
|
|
namespace caffe2 {
|
|
template <typename T, class Context>
|
bool DeformConvOp<T, Context>::RunOnDeviceWithOrderNCHW() {
|
const Tensor& X = Input(INPUT);
|
const Tensor& offset = Input(OFFSET);
|
auto& filter = Input(FILTER);
|
Tensor* Y = Output(0);
|
const int N = X.dim32(0), C = X.dim32(1);
|
CAFFE_ENFORCE_EQ(X.dim(), filter.dim());
|
const int M = filter.dim32(0);
|
CAFFE_ENFORCE(
|
C == filter.dim32(1) * group_,
|
"Convolution op: input channels does not match: # of input channels ",
|
C,
|
" is not equal to kernel channels * group:",
|
filter.dim32(1),
|
"*",
|
group_);
|
CAFFE_ENFORCE(
|
M % group_ == 0,
|
"The number of output channels is not divisible by group.");
|
CAFFE_ENFORCE(
|
kernel_.size() == 2,
|
"Deformable convolution only supports 2d kernel, has ",
|
kernel_.size(),
|
"d kernel.");
|
CAFFE_ENFORCE(
|
offset.dim() == 4,
|
"Deformable convolution only supports 4d offset, has ",
|
offset.dim(),
|
"d offset.");
|
CAFFE_ENFORCE_EQ(offset.dim32(0), N);
|
CAFFE_ENFORCE(
|
C % deformable_group_ == 0,
|
"The number of input channels ",
|
C,
|
" is not divisible by deformable group ",
|
deformable_group_);
|
CAFFE_ENFORCE(
|
M % deformable_group_ == 0,
|
"The number of output channels ",
|
M,
|
" is not divisible by deformable group ",
|
deformable_group_);
|
CAFFE_ENFORCE(
|
offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
|
"Deformable convolution: offset 1st dimension must equal "
|
"2 * kernel_h * kernel_w * deformable_group: 2 * ",
|
kernel_h(),
|
" * ",
|
kernel_w(),
|
" * ",
|
deformable_group_);
|
|
CAFFE_ENFORCE_EQ(
|
offset.dim32(2),
|
(X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
|
stride_h() +
|
1);
|
CAFFE_ENFORCE_EQ(
|
offset.dim32(3),
|
(X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
|
stride_w() +
|
1);
|
|
int kernel_dims_size = 1;
|
for (int i = 0; i < kernel_.size(); ++i) {
|
CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
|
kernel_dims_size *= kernel_[i];
|
}
|
|
ConvPoolOpBase<Context>::SetOutputSize(X, Y, filter.dim32(0));
|
|
const vector<int> input_dims = GetDims(X);
|
const vector<int> output_dims = GetDims(*Y);
|
const int input_image_size = this->GetDimsSize(X);
|
const int output_image_size = this->GetDimsSize(*Y);
|
|
vector<int> img_shape;
|
img_shape.assign(X.sizes().begin() + 1, X.sizes().end());
|
|
vector<int> buffer_shape;
|
buffer_shape.push_back(C / group_ * kernel_dims_size);
|
buffer_shape.insert(
|
buffer_shape.end(), output_dims.begin(), output_dims.end());
|
|
// The dimension of each kernel
|
const int kernel_dim = C / group_ * kernel_dims_size;
|
// The offset corresponding to a single input image, and a single output
|
// image.
|
const int input_offset = C / group_ * input_image_size;
|
const int output_offset = M / group_ * output_image_size;
|
const int offset_offset = offset.numel() / offset.dim32(0);
|
const int filter_offset = filter.numel() / group_;
|
|
// The col buffer is stored in CHW order as well - kernel_dim, and the height
|
// and width.
|
const T* Xdata = X.template data<T>();
|
const T* offset_data = offset.template data<T>();
|
|
if (InputSize() == 4) {
|
auto& bias = Input(BIAS);
|
CAFFE_ENFORCE(bias.dim() == 1);
|
CAFFE_ENFORCE(bias.dim32(0) == M);
|
if (bias_multiplier_.numel() != output_image_size) {
|
// If the helper bias multiplier is not image size, reshape and fill it
|
// with
|
// one.
|
ReinitializeTensor(
|
&bias_multiplier_,
|
vector<int64_t>(1, output_image_size),
|
at::dtype<T>().device(Context::GetDeviceType()));
|
math::Set<T, Context>(
|
output_image_size,
|
static_cast<T>(1),
|
bias_multiplier_.template mutable_data<T>(),
|
&context_);
|
}
|
}
|
T* Ydata = Y->template mutable_data<T>();
|
const T* bias_data = nullptr;
|
if (InputSize() == 4) {
|
bias_data = Input(BIAS).template data<T>();
|
}
|
|
auto f = [&](Tensor* col_buffer) {
|
col_buffer->Resize(buffer_shape);
|
T* col_buffer_data = col_buffer->template mutable_data<T>();
|
// Im2col, followed by gemm.
|
for (int image_id = 0; image_id < N; ++image_id) {
|
for (int group_id = 0; group_id < group_; ++group_id) {
|
DeformableIm2col(
|
Xdata + group_id * input_offset,
|
offset_data,
|
X.sizes(),
|
col_buffer->sizes(),
|
col_buffer_data);
|
// Weight term
|
math::Gemm<T, Context>(
|
CblasNoTrans,
|
CblasNoTrans,
|
M / group_,
|
output_image_size,
|
kernel_dim,
|
1,
|
filter.template data<T>() + group_id * filter_offset,
|
col_buffer_data,
|
0,
|
Ydata + group_id * output_offset,
|
&context_);
|
}
|
if (bias_data) {
|
math::Gemm<T, Context>(
|
CblasNoTrans,
|
CblasNoTrans,
|
M,
|
output_image_size,
|
1,
|
1,
|
bias_data,
|
bias_multiplier_.template data<T>(),
|
1,
|
Ydata,
|
&context_);
|
}
|
Xdata += input_offset * group_;
|
Ydata += output_offset * group_;
|
offset_data += offset_offset;
|
}
|
};
|
|
if (FLAGS_caffe2_force_shared_col_buffer || shared_buffer_) {
|
runWithSharedBuffer<Context>(ws_, f);
|
} else {
|
f(&col_buffer_);
|
}
|
return true;
|
}
|
|
template <typename T, class Context>
|
bool DeformConvGradientOp<T, Context>::RunOnDeviceWithOrderNCHW() {
|
auto& X = Input(INPUT);
|
auto& offset = Input(OFFSET);
|
auto& filter = Input(FILTER);
|
auto& dY = Input(OUTPUT_GRAD);
|
|
|
const int N = X.dim32(0), C = X.dim32(1);
|
|
const vector<int> input_dims = this->GetDims(X);
|
const int input_image_size = this->GetDimsSize(X);
|
|
const vector<int> output_dims = this->GetDims(dY);
|
// The output image size is the spatial size of the output.
|
const int output_image_size = this->GetDimsSize(dY);
|
|
ConvPoolOpBase<Context>::ComputePads(input_dims);
|
CAFFE_ENFORCE_EQ(X.dim(), filter.dim());
|
const int M = filter.dim32(0);
|
CAFFE_ENFORCE(filter.dim32(1) * group_ == C);
|
|
CAFFE_ENFORCE(
|
kernel_.size() == 2,
|
"Deformable convolution only supports 2d kernel, has ",
|
kernel_.size(),
|
"d kernel.");
|
CAFFE_ENFORCE(
|
offset.dim() == 4,
|
"Deformable convolution only supports 4d offset, has ",
|
offset.dim(),
|
"d offset.");
|
CAFFE_ENFORCE_EQ(offset.dim32(0), N);
|
CAFFE_ENFORCE(
|
C % deformable_group_ == 0,
|
"The number of input channels ",
|
C,
|
" is not divisible by deformable group ",
|
deformable_group_);
|
CAFFE_ENFORCE(
|
M % deformable_group_ == 0,
|
"The number of output channels ",
|
M,
|
" is not divisible by deformable group ",
|
deformable_group_);
|
CAFFE_ENFORCE(
|
offset.dim32(1) == 2 * kernel_h() * kernel_w() * deformable_group_,
|
"Deformable convolution: offset 1st dimension must equal "
|
"2 * kernel_h * kernel_w * deformable_group: 2 * ",
|
kernel_h(),
|
" * ",
|
kernel_w(),
|
" * ",
|
deformable_group_);
|
|
CAFFE_ENFORCE_EQ(
|
offset.dim32(2),
|
(X.dim32(2) + pad_t() + pad_b() - (dilation_h() * (kernel_h() - 1) + 1)) /
|
stride_h() +
|
1);
|
CAFFE_ENFORCE_EQ(
|
offset.dim32(3),
|
(X.dim32(3) + pad_l() + pad_r() - (dilation_w() * (kernel_w() - 1) + 1)) /
|
stride_w() +
|
1);
|
|
int kernel_dims_size = 1;
|
for (int i = 0; i < kernel_.size(); ++i) {
|
CAFFE_ENFORCE(filter.dim32(i + 2) == kernel_[i]);
|
kernel_dims_size *= kernel_[i];
|
}
|
|
CAFFE_ENFORCE(M % group_ == 0);
|
auto* dfilter = Output(FILTER_GRAD, filter.sizes(), at::dtype<T>());
|
auto* doffset = Output(OFFSET_GRAD, offset.sizes(), at::dtype<T>());
|
|
// The dimension of each kernel
|
const int kernel_dim = C / group_ * kernel_dims_size;
|
// The offset corresponding to a single input image, and a single output
|
// image.
|
const int input_offset = C / group_ * input_image_size;
|
const int output_offset = M / group_ * output_image_size;
|
const int offset_offset = offset.numel() / offset.dim32(0);
|
const int filter_offset = filter.numel() / group_;
|
|
// The col buffer is stored in CHW order as well - kernel_dim, and the
|
// height and width.
|
vector<int64_t> img_shape;
|
img_shape.assign(X.sizes().begin() + 1, X.sizes().end());
|
vector<int64_t> col_buffer_shape;
|
col_buffer_shape.push_back(C * kernel_dims_size);
|
col_buffer_shape.insert(
|
col_buffer_shape.end(), output_dims.begin(), output_dims.end());
|
ReinitializeTensor(
|
&col_buffer_,
|
col_buffer_shape,
|
at::dtype<T>().device(Context::GetDeviceType()));
|
|
const int col_buffer_offset = col_buffer_.numel() / group_;
|
|
const T* Xdata = X.template data<T>();
|
const T* filter_data = filter.template data<T>();
|
const T* offset_data = offset.template data<T>();
|
const T* dYdata = dY.template data<T>();
|
T* col_buffer_data = col_buffer_.template mutable_data<T>();
|
T* dfilter_data = dfilter->template mutable_data<T>();
|
T* doffset_data = doffset->template mutable_data<T>();
|
|
// Pre-setting the gradients to zero.
|
math::Set<T, Context>(dfilter->numel(), 0, dfilter_data, &context_);
|
|
T* dbias_data = nullptr;
|
if (!no_bias_) {
|
|
auto* dbias = Output(BIAS_OR_INPUT_GRAD, {M}, at::dtype<T>());
|
if (bias_multiplier_.numel() != output_image_size) {
|
// If the helper bias multiplier is not M, reshape and fill it with one.
|
ReinitializeTensor(
|
&bias_multiplier_,
|
vector<int64_t>(1, output_image_size),
|
at::dtype<T>().device(Context::GetDeviceType()));
|
math::Set<T, Context>(
|
output_image_size,
|
static_cast<T>(1),
|
bias_multiplier_.template mutable_data<T>(),
|
&context_);
|
}
|
dbias_data = dbias->template mutable_data<T>();
|
math::Set<T, Context>(dbias->numel(), 0, dbias_data, &context_);
|
}
|
|
T* dXdata = nullptr;
|
if (OutputSize() == 4 || (no_bias_ && (OutputSize() == 3))) {
|
|
auto* dX = Output(no_bias_ ? BIAS_OR_INPUT_GRAD : INPUT_GRAD, X.sizes(), at::dtype<T>());
|
dXdata = dX->template mutable_data<T>();
|
math::Set<T, Context>(dX->numel(), 0, dXdata, &context_);
|
}
|
|
for (int image_id = 0; image_id < N; ++image_id) {
|
for (int group_id = 0; group_id < group_; ++group_id) {
|
math::Gemm<T, Context>(
|
CblasTrans,
|
CblasNoTrans,
|
kernel_dim,
|
output_image_size,
|
M / group_,
|
1,
|
filter_data + group_id * filter_offset,
|
dYdata + group_id * output_offset,
|
0,
|
col_buffer_data + group_id * col_buffer_offset,
|
&context_);
|
}
|
|
// Gradient with respect to offsets
|
DeformableCol2imCoord(
|
col_buffer_data,
|
Xdata,
|
offset_data,
|
X.sizes(),
|
col_buffer_shape,
|
doffset_data);
|
|
// Gradient with respect to input data
|
if (dXdata) {
|
DeformableCol2im(
|
col_buffer_data, offset_data, X.sizes(), col_buffer_shape, dXdata);
|
dXdata += input_offset * group_;
|
}
|
|
// Gradient with respect to filter
|
DeformableIm2col(
|
Xdata, offset_data, X.sizes(), col_buffer_shape, col_buffer_data);
|
|
for (int group_id = 0; group_id < group_; ++group_id) {
|
math::Gemm<T, Context>(
|
CblasNoTrans,
|
CblasTrans,
|
M / group_,
|
kernel_dim,
|
output_image_size,
|
1,
|
dYdata + group_id * output_offset,
|
col_buffer_data + group_id * col_buffer_offset,
|
1,
|
dfilter_data + group_id * filter_offset,
|
&context_);
|
}
|
|
// Gradient with respect to bias
|
if (dbias_data) {
|
math::Gemv<T, Context>(
|
CblasNoTrans,
|
M,
|
output_image_size,
|
1,
|
dYdata,
|
bias_multiplier_.template data<T>(),
|
1,
|
dbias_data,
|
&context_);
|
}
|
|
Xdata += input_offset * group_;
|
dYdata += output_offset * group_;
|
offset_data += offset_offset;
|
doffset_data += offset_offset;
|
}
|
|
return true;
|
}
|
} // namespace caffe2
|
|
#endif // CAFFE2_OPERATORS_DEFORM_CONV_OP_IMPL_H_
|