#ifndef CAFFE2_OPERATORS_FILLER_OP_H_ #define CAFFE2_OPERATORS_FILLER_OP_H_ #include "caffe2/core/context.h" #include "caffe2/core/logging.h" #include "caffe2/core/operator.h" #include "caffe2/utils/math.h" namespace caffe2 { // FillerOp takes in either zero or one input. // // If the number of input is 1, the shape will be identical to that of the input // at run time with optional additional dimensions appended at the end as // specified by "extra_shape" argument. In that case the "shape" parameter // should not be set. // // If the number of inputs is 0, the full shape must be provided via "shape" // argument template class FillerOp : public Operator { public: template explicit FillerOp(Args&&... args) : Operator(std::forward(args)...), shape_(this->template GetRepeatedArgument("shape")), extra_shape_(ToVectorint64_t( this->template GetRepeatedArgument("extra_shape"))), input_as_shape_( this->template GetSingleArgument("input_as_shape", false)) { if (InputSize()) { if (shape_.size() != 0) { CAFFE_THROW( "Cannot set the shape argument and pass in an input at " "the same time"); } } else { if (!extra_shape_.empty()) { CAFFE_THROW("Cannot set extra_shape when there is no input"); } if (input_as_shape_) { CAFFE_THROW("An input must be given if input_as_shape is true"); } if (shape_.size() == 0 && this->template HasSingleArgumentOfType("shape")) { CAFFE_THROW("Fill 'shape' argument was a scalar, list expected"); } } } virtual ~FillerOp() {} USE_OPERATOR_CONTEXT_FUNCTIONS; bool RunOnDevice() override { auto* output = Operator::Output(0); if (InputSize()) { auto shape = vector{}; if (input_as_shape_) { if (this->InputIsTensorType(0, CPU)) { // originally, shape input must be in CPU context auto& input = this->template Input(0, CPU); CAFFE_ENFORCE_EQ( input.dim(), 1, "When input_as_shape is true, the input must be a 1D tensor of " "data type int64_t"); CAFFE_ENFORCE(input.numel() > 0); auto* shape_data = input.template data(); shape.insert(shape.end(), shape_data, shape_data + input.dim32(0)); } else { // in ONNX case, we allow shape to be in CUDA context auto& input = Input(0); CAFFE_ENFORCE_EQ( input.dim(), 1, "When input_as_shape is true, the input must be a 1D tensor of " "data type int64_t"); CAFFE_ENFORCE(input.numel() > 0); auto* shape_data = input.template data(); std::unique_ptr shape_data_copy = caffe2::make_unique(input.dim32(0)); context_.template CopyToCPU(input.dim32(0), shape_data, shape_data_copy.get()); shape.insert(shape.end(), shape_data_copy.get(), shape_data_copy.get() + input.dim32(0)); } } else { auto& input = Input(0); shape.insert(shape.end(), input.sizes().begin(), input.sizes().end()); } shape.insert(shape.end(), extra_shape_.begin(), extra_shape_.end()); output->Resize(shape); } else { output->Resize(shape_); } return Fill(output); } virtual bool Fill(Tensor* output) = 0; protected: vector shape_; vector extra_shape_; bool input_as_shape_; }; template class UniformFillOp final : public FillerOp { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit UniformFillOp(Args&&... args) : FillerOp(std::forward(args)...), min_(this->template GetSingleArgument("min", 0)), max_(this->template GetSingleArgument("max", 1)) { if (InputSize() == 3) { CAFFE_ENFORCE( !this->template HasSingleArgumentOfType("min"), "Cannot set both min arg and min input blob"); CAFFE_ENFORCE( !this->template HasSingleArgumentOfType("max"), "Cannot set both max arg and max input blob"); } else { CAFFE_ENFORCE_LT( min_, max_, "Max value should be bigger than min value."); } } bool Fill(Tensor* output) override { T min = min_; T max = max_; if (InputSize() == 3) { CAFFE_ENFORCE_EQ(1, Input(1).numel(), "min blob must be scalar"); CAFFE_ENFORCE_EQ(1, Input(2).numel(), "max blob must be scalar"); min = *Input(1).template data(); max = *Input(2).template data(); if (min > max) { auto shape = output->sizes().vec(); shape[0] = 0; output->Resize(shape); output->template mutable_data(); return true; } } math::RandUniform( output->numel(), min, max, output->template mutable_data(), &context_); return true; } private: T min_; T max_; }; template class UniqueUniformFillOp final : public FillerOp { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit UniqueUniformFillOp(Args&&... args) : FillerOp(std::forward(args)...) { TensorProto_DataType dtype = static_cast(this->template GetSingleArgument( "dtype", TensorProto_DataType_INT32)); switch (dtype) { case TensorProto_DataType_INT32: CheckRange(); body_ = &UniqueUniformFillOp::FillWithType; break; case TensorProto_DataType_INT64: CheckRange(); body_ = &UniqueUniformFillOp::FillWithType; break; case TensorProto_DataType_UNDEFINED: CAFFE_THROW( "UniqueUniformFill op cannot have undefined 'dtype' argument"); // break; default: CAFFE_THROW("Unexpected 'dtype' argument value: ", dtype); } } bool Fill(Tensor* output) override { return (this->*body_)(output); } private: template void CheckRange() { CAFFE_ENFORCE(this->template HasSingleArgumentOfType("min")); CAFFE_ENFORCE(this->template HasSingleArgumentOfType("max")); CAFFE_ENFORCE_LT( this->template GetSingleArgument("min", 0), this->template GetSingleArgument("max", 0), "Max value should be bigger than min value."); } template bool FillWithType(Tensor* output) { T min = this->template GetSingleArgument("min", 0); T max = this->template GetSingleArgument("max", 0); const T* avoid_data = nullptr; size_t avoid_size = 0; if (InputSize() >= 2) { auto& avoid = Input(1); avoid_data = avoid.template data(); avoid_size = avoid.numel(); } math::RandUniformUnique( output->numel(), min, max, output->template mutable_data(), avoid_size, avoid_data, &context_); return true; } bool (UniqueUniformFillOp::*body_)(Tensor* output); }; template class ConstantFillOp final : public FillerOp { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit ConstantFillOp(Args&&... args) : FillerOp(std::forward(args)...) { TensorProto_DataType dtype = static_cast(this->template GetSingleArgument( "dtype", TensorProto_DataType_FLOAT)); if (!OperatorBase::HasArgument("dtype") && OperatorBase::HasArgument("value")) { // If 'dtype' is not provided, infer type based on the type of 'value' // Currently, single argument contains either float, int64 or bytes if (this->template HasSingleArgumentOfType("value")) { dtype = TensorProto_DataType_FLOAT; } else if (this->template HasSingleArgumentOfType("value")) { dtype = TensorProto_DataType_INT64; } else { CAFFE_THROW("Argument 'value' is of unexpected type"); } VLOG(1) << "Argument 'dtype' is not provided. Assume the data type is " << "the same as that of argument 'value': " << dtype; } switch (dtype) { case TensorProto_DataType_FLOAT: body_ = &ConstantFillOp::FillWithType; break; case TensorProto_DataType_DOUBLE: body_ = &ConstantFillOp::FillWithType; break; case TensorProto_DataType_BOOL: body_ = &ConstantFillOp::FillWithType; break; case TensorProto_DataType_INT8: body_ = &ConstantFillOp::FillWithType; break; case TensorProto_DataType_INT16: body_ = &ConstantFillOp::FillWithType; break; case TensorProto_DataType_INT32: body_ = &ConstantFillOp::FillWithType; break; case TensorProto_DataType_INT64: body_ = &ConstantFillOp::FillWithType; break; case TensorProto_DataType_UINT8: body_ = &ConstantFillOp::FillWithType; break; case TensorProto_DataType_UINT16: body_ = &ConstantFillOp::FillWithType; break; case TensorProto_DataType_STRING: body_ = &ConstantFillOp::FillWithString; break; case TensorProto_DataType_UNDEFINED: CAFFE_THROW("ConstantFill op cannot have undefined 'dtype' argument"); // break; default: CAFFE_THROW("Unexpected 'dtype' argument value: ", dtype); } } bool Fill(Tensor* output) override { return (this->*body_)(output); } template bool FillWithType(Tensor* output) { T value = this->template GetSingleArgument("value", 0); auto* data = output->template mutable_data(); if (output->numel()) { math::Set(output->numel(), value, data, &context_); } return true; } bool FillWithString(Tensor* output) { auto value = this->template GetSingleArgument("value", ""); auto* data = output->template mutable_data(); for (int i = 0; i < output->numel(); ++i) { data[i] = value; } return true; } private: bool (ConstantFillOp::*body_)(Tensor* output); }; template class DiagonalFillOp final : public FillerOp { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit DiagonalFillOp(Args&&... args) : FillerOp(std::forward(args)...) { TensorProto_DataType dtype = static_cast(this->template GetSingleArgument( "dtype", TensorProto_DataType_FLOAT)); if (!OperatorBase::HasArgument("dtype") && OperatorBase::HasArgument("value")) { // If 'dtype' is not provided, infer type based on the type of 'value' // Currently, single argument contains either float, int64 or bytes if (this->template HasSingleArgumentOfType("value")) { dtype = TensorProto_DataType_FLOAT; } else if (this->template HasSingleArgumentOfType("value")) { dtype = TensorProto_DataType_INT64; } else { CAFFE_THROW("Argument 'value' is of unexpected type"); } VLOG(1) << "Argument 'dtype' is not provided. Assume the data type is " << "the same as that of argument 'value': " << dtype; } switch (dtype) { case TensorProto_DataType_FLOAT: body_ = &DiagonalFillOp::FillWithType; break; case TensorProto_DataType_DOUBLE: body_ = &DiagonalFillOp::FillWithType; break; case TensorProto_DataType_BOOL: body_ = &DiagonalFillOp::FillWithType; break; case TensorProto_DataType_INT8: body_ = &DiagonalFillOp::FillWithType; break; case TensorProto_DataType_INT16: body_ = &DiagonalFillOp::FillWithType; break; case TensorProto_DataType_INT32: body_ = &DiagonalFillOp::FillWithType; break; case TensorProto_DataType_INT64: body_ = &DiagonalFillOp::FillWithType; break; case TensorProto_DataType_UINT8: body_ = &DiagonalFillOp::FillWithType; break; case TensorProto_DataType_UINT16: body_ = &DiagonalFillOp::FillWithType; break; case TensorProto_DataType_UNDEFINED: CAFFE_THROW("Cannot have undefined 'dtype' argument"); default: CAFFE_THROW("Unexpected 'dtype' argument value: ", dtype); } } bool Fill(Tensor* output) override { return (this->*body_)(output); } template bool FillWithType(Tensor* output); private: void VerifyOutputShape(Tensor* output) { CAFFE_ENFORCE(output->dim() >= 2, "Input shape must be >= 2D"); } int64_t GetStepSize(Tensor* output) { int64_t step; if (output->dim() == 2) { step = output->size(1) + 1; } else { int64_t prev_i = output->size(0); for (auto i : output->sizes()) { if (i != prev_i) { CAFFE_THROW("All dimensions of input must be of equal length"); } } vector cumprod(output->dim()); auto dims = output->sizes(); std::partial_sum( dims.begin(), dims.end() - 1, cumprod.begin(), std::multiplies()); step = 1 + std::accumulate( cumprod.begin(), cumprod.end(), static_cast(0)); VLOG(0) << step; } return step; } bool (DiagonalFillOp::*body_)(Tensor* output); }; template class GaussianFillOp final : public FillerOp { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit GaussianFillOp(Args&&... args) : FillerOp(std::forward(args)...), mean_(this->template GetSingleArgument("mean", 0)), std_(this->template GetSingleArgument("std", 1)) { DCHECK_GT(std_, 0) << "Standard deviation should be nonnegative."; } bool Fill(Tensor* output) override { math::RandGaussian( output->numel(), mean_, std_, output->template mutable_data(), &context_); return true; } private: T mean_; T std_; }; template class XavierFillOp final : public FillerOp { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit XavierFillOp(Args&&... args) : FillerOp(std::forward(args)...) {} bool Fill(Tensor* output) override { const int fan_in = output->numel() / output->dim32(0); T scale = std::sqrt(T(3) / fan_in); math::RandUniform( output->numel(), -scale, scale, output->template mutable_data(), &context_); return true; } }; template class MSRAFillOp final : public FillerOp { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit MSRAFillOp(Args&&... args) : FillerOp(std::forward(args)...) {} bool Fill(Tensor* output) override { const int fan_out = output->numel() / output->dim32(1); T scale = std::sqrt(T(2) / fan_out); math::RandGaussian( output->numel(), 0.0, scale, output->template mutable_data(), &context_); return true; } }; // This is mostly used just as a debugging purpose stuff: it fills a tensor // sequentially with values 0, 1, 2..., which can then be used to check e.g. // reshape operations by allowing one to read the indices more easily. template class RangeFillOp final : public FillerOp { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit RangeFillOp(Args&&... args) : FillerOp(std::forward(args)...) {} bool Fill(Tensor* output) override; }; template class LengthsRangeFillOp : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; USE_SIMPLE_CTOR_DTOR(LengthsRangeFillOp); bool RunOnDevice() override { auto& input = Input(0); auto* input_data = input.template data(); CAFFE_ENFORCE_EQ(input.dim(), 1, "Input must be a vector."); auto len_sum = std::accumulate(input_data, input_data + input.numel(), 0); auto* output = Output(0, {len_sum}, at::dtype()); auto* output_data = output->template mutable_data(); int32_t offset = 0; for (int i = 0; i < input.numel(); ++i) { auto len = input_data[i]; auto start = output_data + offset; std::iota( start, start + len, 0); // make the third argument the arg of this operator offset += len; } return true; } }; template inline std::vector FillerTensorInference( const OperatorDef& def, const vector& in) { vector out(1); ArgumentHelper helper(def); out[0].set_data_type(static_cast( helper.GetSingleArgument("dtype", VALUE_TYPE))); if (in.size()) { // TODO bool input_as_shape = helper.GetSingleArgument("input_as_shape", false); if (input_as_shape) { out[0].set_unknown_shape(true); return out; } for (auto d : in[0].dims()) { out[0].add_dims(d); } } else { auto shape = helper.GetRepeatedArgument("shape"); for (auto d : shape) { out[0].add_dims(d); } } return out; } } // namespace caffe2 #endif // CAFFE2_OPERATORS_FILLER_OP_H_