#ifndef CAFFE2_OPERATORS_TILE_OP_H_ #define CAFFE2_OPERATORS_TILE_OP_H_ #include #include #include #include #include "caffe2/core/common_omp.h" #include "caffe2/core/context.h" #include "caffe2/core/logging.h" #include "caffe2/core/operator.h" #include "caffe2/utils/eigen_utils.h" #include "caffe2/utils/math.h" namespace caffe2 { // Copy a Blob n times along a specified axis. template class TileOp final : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit TileOp(Args&&... args) : Operator(std::forward(args)...), OP_SINGLE_ARG(std::int32_t, "tiles", tiles_, 1), OP_SINGLE_ARG(std::int32_t, "axis", axis_, 0) {} bool RunOnDevice() override { return DispatchHelper< TensorTypes>:: call(this, Input(0)); } template bool DoRunWithType() { if (InputSize() > 1) { // We potentially have tiles and/or axis specified as inputs // as well. We will check for them in that order. In other words: // InputSize() == 2: tiles is specified // InputSize() == 3: tiles is specified and axis. // Anything specified as input will override the arguments CAFFE_ENFORCE( Input(1).dim() == 1 && Input(1).numel() == 1, "Input `tiles` should be a vector of size 1."); tiles_ = GetArgFromTensor(Input(1)); // Because of a bug in original code, temporarily adds this part to keep // backward compatibility. // TODO(yangxm): Remove this part when prod runtime upgraded with fixed // model config. if (Input(1).template IsType()) { axis_ = 0; } if (InputSize() > 2) { CAFFE_ENFORCE( Input(2).dim() == 1 && Input(2).numel() == 1, "Input `axis` should be a vector of size 1."); axis_ = GetArgFromTensor(Input(2)); } else { CAFFE_ENFORCE( OperatorBase::HasArgument("axis"), "Argument `axis` is missing and was not specified as input."); } } else { CAFFE_ENFORCE( OperatorBase::HasArgument("tiles"), "Argument `tiles` is missing and was not specified as input."); CAFFE_ENFORCE( OperatorBase::HasArgument("axis"), "Argument `axis` is missing and was not specified as input."); } const auto& X = Input(0); auto* Y = Output(0); const int axis = X.canonical_axis_index(axis_); // reshape output to be input tiled along the axis std::vector Y_dims = X.sizes().vec(); Y_dims[axis] *= tiles_; Y->Resize(Y_dims); // size up to (and not including) axis const int outer_size = X.size_to_dim(axis); // size from axis up const int inner_size = X.size_from_dim(axis); const T* X_data = X.template data(); T* Y_data = Y->template mutable_data(); return DoTile(outer_size, inner_size, X_data, Y_data); } private: std::int32_t GetArgFromTensor(const Tensor& tensor) { CAFFE_ENFORCE( tensor.IsType() || tensor.IsType()); std::int32_t val = -1; if (tensor.IsType()) { context_.template CopyToCPU( 1, tensor.data(), &val); } else if (tensor.IsType()) { std::int64_t val_int64; context_.template CopyToCPU( 1, tensor.data(), &val_int64); val = static_cast(val_int64); } return val; } template bool DoTile(const int outer_size, const int inner_size, const T* X, T* Y) { if (inner_size == 1) { EigenArrayMap Y_arr(Y, tiles_, outer_size); for (int i = 0; i < outer_size; ++i) { Y_arr.col(i) = X[i]; } } else { ConstEigenArrayMap X_arr(X, inner_size, outer_size); for (int i = 0; i < outer_size; ++i) { EigenArrayMap(Y + i * tiles_ * inner_size, inner_size, tiles_) .colwise() = X_arr.col(i); } } return true; } std::int32_t tiles_; std::int32_t axis_; }; template class TileGradientOp final : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit TileGradientOp(Args&&... args) : Operator(std::forward(args)...), OP_SINGLE_ARG(std::int32_t, "tiles", tiles_, 1), OP_SINGLE_ARG(std::int32_t, "axis", axis_, 0) {} bool RunOnDevice() override { return DispatchHelper< TensorTypes>:: call(this, Input(0)); } template bool DoRunWithType() { if (InputSize() > 1) { // We potentially have tiles and/or axis specified as inputs // as well. We will check for them in that order. In other words: // InputSize() == 2: tiles is specified // InputSize() == 3: tiles is specified and axis. // Anything specified as input will override the arguments CAFFE_ENFORCE( Input(1).dim() == 1 && Input(1).numel() == 1, "Input `tiles` should be a vector of size 1."); tiles_ = GetArgFromTensor(Input(1)); if (InputSize() > 2) { CAFFE_ENFORCE( Input(2).dim() == 1 && Input(2).numel() == 1, "Input `axis` should be a vector of size 1."); axis_ = GetArgFromTensor(Input(2)); } else { CAFFE_ENFORCE( OperatorBase::HasArgument("axis"), "Argument `axis` is missing and was not specified as input."); } } else { CAFFE_ENFORCE( OperatorBase::HasArgument("tiles"), "Argument `tiles` is missing and was not specified as input."); CAFFE_ENFORCE( OperatorBase::HasArgument("axis"), "Argument `axis` is missing and was not specified as input."); } const auto& dY = Input(0); auto* dX = Output(0); const int axis = dY.canonical_axis_index(axis_); // reshape output to be input "untiled" along the axis std::vector X_dims = dY.sizes().vec(); CAFFE_ENFORCE_EQ(X_dims[axis] % tiles_, 0); X_dims[axis] /= tiles_; dX->Resize(X_dims); // size up to (and not including) axis const int outer_size = dX->size_to_dim(axis); // size from axis up const int inner_size = dX->size_from_dim(axis); /** * How this works: * Imagine a 2D tensor (matrix) of size 3x10, tiled 2 times along axis 1 * (column). * This is equivalent to multiplying by a vector of 1s transposed. * The gradient of this is all 1s in the shape of the input matrix * (call it X). * So the output gradient should be the matrix multipication result * of input gradient (gradient of tiled tensor output) and X. */ const T* dY_data = dY.template data(); T* dX_data = dX->template mutable_data(); return DoTileGradient(outer_size, inner_size, dY_data, dX_data); } private: std::int32_t GetArgFromTensor(const Tensor& tensor) { CAFFE_ENFORCE( tensor.IsType() || tensor.IsType()); std::int32_t val = -1; if (tensor.IsType()) { context_.template CopyToCPU( 1, tensor.data(), &val); } else if (tensor.IsType()) { std::int64_t val_int64; context_.template CopyToCPU( 1, tensor.data(), &val_int64); val = static_cast(val_int64); } return val; } template bool DoTileGradient( const int outer_size, const int inner_size, const T* dY, T* dX) { if (inner_size == 1) { const std::array dY_dims = {outer_size, tiles_}; const std::array dX_dims = {outer_size, 1}; math::ReduceSum( 2, dY_dims.data(), dX_dims.data(), T(1), dY, dX, &context_); } else { math::CopyMatrix( outer_size, inner_size, dY, inner_size * tiles_, dX, inner_size, &context_); for (int i = 0; i < outer_size; ++i) { const T* dY_ptr = dY + i * tiles_ * inner_size; T* dX_ptr = dX + i * inner_size; for (int j = 1; j < tiles_; ++j) { math::Add( inner_size, dX_ptr, dY_ptr + j * inner_size, dX_ptr, &context_); } } } return true; } std::int32_t tiles_; std::int32_t axis_; Tensor ones_; }; } // namespace caffe2 #endif // CAFFE2_OPERATORS_TILE_OP_H_