#ifndef CAFFE2_OPERATORS_SPARSE_TO_DENSE_OP_H_ #define CAFFE2_OPERATORS_SPARSE_TO_DENSE_OP_H_ #include "caffe2/core/context.h" #include "caffe2/core/operator.h" #include "caffe2/utils/math.h" namespace caffe2 { template class SparseToDenseOp final : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; USE_DISPATCH_HELPER; template explicit SparseToDenseOp(Args&&... args) : Operator(std::forward(args)...), output_first_dim_( this->template GetSingleArgument("output_first_dim", 0)) {} bool RunOnDevice() override { return DispatchHelper>::call( this, Input(INDICES)); } private: template int GetOutputFirstDim( const TInd* sparse_indices_vec, const int32_t sparse_indices_len) { if (output_first_dim_ > 0) { CAFFE_ENFORCE_EQ(InputSize(), 2); return output_first_dim_; } if (InputSize() == 3) { auto& data_to_infer_dim = Input(DATA_TO_INFER_DIM); CAFFE_ENFORCE_GE(data_to_infer_dim.dim(), 1); return data_to_infer_dim.dim32(0); } if (sparse_indices_len <= 0) { return 0; } // Awkward way to get the max element to make it work with both CUDA // and CPU. ReinitializeTensor(&max_element_, {1}, at::dtype().device(Context::GetDeviceType())); TInd* max_element_ptr = max_element_.template mutable_data(); math::ReduceMax(sparse_indices_len, sparse_indices_vec, max_element_ptr, &scratch_, &context_); max_element_host_.CopyFrom(max_element_); return 1 + max_element_host_.template data()[0]; } template bool DoRunWithType() { return DispatchHelper< TensorTypes2< float, int32_t, int64_t, GenericTensorImplementation>, TInd>::call(this, Input(VALUES)); } template bool DoRunWithType2() { auto& sparse_indices = Input(INDICES); CAFFE_ENFORCE_EQ(sparse_indices.dim(), 1); auto& sparse_values = Input(VALUES); CAFFE_ENFORCE_GE(sparse_values.dim(), 1); CAFFE_ENFORCE_EQ(sparse_indices.numel(), sparse_values.size(0)); const TInd* sparse_indices_vec = sparse_indices.template data(); const int32_t sparse_indices_len = sparse_indices.dim32(0); const int output_first_dim = GetOutputFirstDim(sparse_indices_vec, sparse_indices_len); auto shape = sparse_values.sizes().vec(); shape[0] = output_first_dim; auto* output = Output(0, shape, at::dtype()); TData* output_data = output->template mutable_data(); if (!output_first_dim) { return true; } memset(output_data, 0, output->nbytes()); const auto block_nitems = sparse_values.size_from_dim(1); const TData* sparse_values_vec = sparse_values.template data(); for (int32_t i = 0; i < sparse_indices_len; i++) { const TInd idx = sparse_indices_vec[i]; CAFFE_ENFORCE_GE(idx, 0); CAFFE_ENFORCE_LT(idx, output_first_dim); math::Add( block_nitems, output_data + idx * block_nitems, sparse_values_vec + i * block_nitems, output_data + idx * block_nitems, &context_); } return true; } template bool DoRunWithOtherType2() { CAFFE_THROW( "SparseToDense is not implemented on tensor of type ", Input(VALUES).dtype().name(), "consider adding it as a type in the DispatchHelper list or " "implementing a generic version (which won't work for " "duplicated indices though)"); } private: int output_first_dim_; Tensor scratch_{Context::GetDeviceType()}; Tensor max_element_host_{CPU}; Tensor max_element_; INPUT_TAGS(INDICES, VALUES, DATA_TO_INFER_DIM); }; } // namespace caffe2 #endif // CAFFE2_OPERATORS_SPARSE_TO_DENSE_OP_H_