#ifndef CAFFE2_OPERATORS_LOCALLY_CONNECTED_OP_H_
|
#define CAFFE2_OPERATORS_LOCALLY_CONNECTED_OP_H_
|
|
#include <vector>
|
|
#include "caffe2/core/context.h"
|
#include "caffe2/core/operator.h"
|
#include "caffe2/operators/conv_op_shared.h"
|
#include "caffe2/operators/conv_pool_op_base.h"
|
#include "caffe2/operators/locally_connected_op_util.h"
|
|
namespace caffe2 {
|
|
template <typename T, class Context>
|
class LocallyConnectedOp final : public ConvPoolOpBase<Context> {
|
public:
|
USE_CONV_POOL_BASE_FUNCTIONS(Context);
|
|
template <class... Args>
|
explicit LocallyConnectedOp(Args&&... args)
|
: ConvPoolOpBase<Context>(std::forward<Args>(args)...) {
|
// Since this is the default locally connected implementation, we will
|
// use CAFFE_ENFORCE instead of OPERATOR_NEEDS_FEATURE.
|
CAFFE_ENFORCE(
|
group_ == 1 || order_ == StorageOrder::NCHW,
|
"Group locally connected only supports NCHW order right now.");
|
}
|
|
~LocallyConnectedOp() = default;
|
|
bool RunOnDeviceWithOrderNCHW() override;
|
bool RunOnDeviceWithOrderNHWC() override;
|
|
private:
|
void RunOnDeviceWithOrderNCHWImpl(
|
const lc_op_util::ShapeParams& shape,
|
const T* X_data,
|
const T* filter_data,
|
const T* bias_data,
|
T* Y_data,
|
Tensor* column_buffer,
|
Tensor* column_transposed_buffer,
|
Tensor* output_buffer);
|
|
void RunOnDeviceWithOrderNHWCImpl(
|
const lc_op_util::ShapeParams& shape,
|
const T* X_data,
|
const T* filter_data,
|
const T* bias_data,
|
T* Y_data,
|
Tensor* column_buffer,
|
Tensor* column_transposed_buffer,
|
Tensor* Y_transposed_buffer);
|
|
Tensor bias_multiplier_{Context::GetDeviceType()};
|
|
// Buffer.
|
Tensor column_buffer_{Context::GetDeviceType()};
|
Tensor column_transposed_buffer_{Context::GetDeviceType()};
|
Tensor Y_transposed_buffer_{Context::GetDeviceType()};
|
|
// Input: X, W, b
|
// Output: Y
|
INPUT_TAGS(INPUT, FILTER, BIAS);
|
};
|
|
template <typename T, class Context>
|
class LocallyConnectedGradientOp final : public ConvPoolOpBase<Context> {
|
public:
|
USE_CONV_POOL_BASE_FUNCTIONS(Context);
|
|
template <class... Args>
|
explicit LocallyConnectedGradientOp(Args&&... args)
|
: ConvPoolOpBase<Context>(std::forward<Args>(args)...),
|
OP_SINGLE_ARG(bool, "no_bias", no_bias_, false) {
|
CAFFE_ENFORCE(
|
!(no_bias_ && OutputSize() == 3),
|
"If bias is not present, you should not have 3 grad output.");
|
CAFFE_ENFORCE(
|
group_ == 1 || order_ == StorageOrder::NCHW,
|
"Group locally connected only supports NCHW order right now.");
|
}
|
|
~LocallyConnectedGradientOp() = default;
|
|
bool RunOnDeviceWithOrderNCHW() override;
|
bool RunOnDeviceWithOrderNHWC() override;
|
|
private:
|
void RunOnDeviceWithOrderNCHWImpl(
|
const lc_op_util::ShapeParams& shape,
|
const T* X_data,
|
const T* filter_data,
|
const T* dY_data,
|
T* dfilter_data,
|
T* dX_data,
|
T* dbias_data,
|
Tensor* column_buffer,
|
Tensor* column_transposed_buffer,
|
Tensor* dY_transposed_buffer);
|
|
void RunOnDeviceWithOrderNHWCImpl(
|
const lc_op_util::ShapeParams& shape,
|
const T* X_data,
|
const T* filter_data,
|
const T* dY_data,
|
T* dfilter_data,
|
T* dX_data,
|
T* dbias_data,
|
Tensor* column_buffer,
|
Tensor* column_transposed_buffer,
|
Tensor* dY_transposed_buffer);
|
|
const bool no_bias_;
|
|
Tensor bias_multiplier_{Context::GetDeviceType()};
|
|
// Buffer.
|
Tensor column_buffer_{Context::GetDeviceType()};
|
Tensor column_transposed_buffer_{Context::GetDeviceType()};
|
Tensor dY_transposed_buffer_{Context::GetDeviceType()};
|
|
// input: X, W, dY
|
// output: dW, db, and optionally dX
|
INPUT_TAGS(INPUT, FILTER, OUTPUT_GRAD);
|
OUTPUT_TAGS(FILTER_GRAD, BIAS_OR_INPUT_GRAD, INPUT_GRAD);
|
};
|
|
} // namespace caffe2
|
|
#endif // CAFFE2_OPERATORS_LOCALLY_CONNECTED_OP_H_
|