#ifndef CAFFE2_OPERATORS_FEATURE_MAPS_OPS_H_ #define CAFFE2_OPERATORS_FEATURE_MAPS_OPS_H_ #include "caffe2/core/context.h" #include "caffe2/core/operator.h" namespace caffe2 { template class MergeSingleScalarFeatureTensorsOp : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit MergeSingleScalarFeatureTensorsOp(Args&&... args) : Operator(std::forward(args)...) { numInputs_ = InputSize() / kNumTensorsPerInput; featureIDs_ = this->template GetRepeatedArgument("feature_ids"); } virtual ~MergeSingleScalarFeatureTensorsOp() noexcept {} bool RunOnDevice() override { return DispatchHelper< TensorTypes>:: call(this, Input(0)); } template bool DoRunWithType() { int numExamples = Input(0).numel(); int totalNumFeatures = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { const bool* inPresenceData = Input(kNumTensorsPerInput * inputIndex + 1).template data(); for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { if (inPresenceData[exampleIndex]) { ++totalNumFeatures; } } } auto* outLengths = Output(0, {numExamples}, at::dtype()); auto* outKeys = Output(1, {totalNumFeatures}, at::dtype()); auto* outValues = Output(2, {totalNumFeatures}, at::dtype()); int32_t* outLengthsData = outLengths->template mutable_data(); int64_t* outKeysData = outKeys->template mutable_data(); T* outValuesData = outValues->template mutable_data(); int keysOffset = 0; for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { outLengthsData[exampleIndex] = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { const T* inData = Input(kNumTensorsPerInput * inputIndex).template data(); const bool* inPresenceData = Input(kNumTensorsPerInput * inputIndex + 1).template data(); if (inPresenceData[exampleIndex]) { ++outLengthsData[exampleIndex]; outKeysData[keysOffset] = featureIDs_[inputIndex]; outValuesData[keysOffset] = inData[exampleIndex]; ++keysOffset; } } } return true; } private: const int kNumTensorsPerInput = 2; int numInputs_; std::vector featureIDs_; }; template class MergeSingleScalarFeatureTensorsGradientOp : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit MergeSingleScalarFeatureTensorsGradientOp(Args&&... args) : Operator(std::forward(args)...) { numFeatureInputs_ = InputSize() - 1; // Everything other than values_grad } virtual ~MergeSingleScalarFeatureTensorsGradientOp() noexcept {} bool RunOnDevice() override { return DispatchHelper< TensorTypes>:: call(this, Input(InputSize() - 1)); } template bool DoRunWithType() { int numExamples = Input(0).numel(); for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) { Output(inputIndex)->ResizeLike(Input(inputIndex)); } const T* inValuesGradData = Input(InputSize() - 1).template data(); T default_value = T(); int valuesOffset = 0; for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) { const bool* inPresenceData = Input(inputIndex).template data(); T* outFeatureData = Output(inputIndex)->template mutable_data(); if (inPresenceData[exampleIndex]) { outFeatureData[exampleIndex] = inValuesGradData[valuesOffset]; ++valuesOffset; } else { outFeatureData[exampleIndex] = default_value; } } } return true; } private: int numFeatureInputs_; }; template class MergeSingleListFeatureTensorsOp : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit MergeSingleListFeatureTensorsOp(Args&&... args) : Operator(std::forward(args)...) { numInputs_ = InputSize() / kNumTensorsPerInput; inValuesOffset_.resize(numInputs_); featureIDs_ = this->template GetRepeatedArgument("feature_ids"); } virtual ~MergeSingleListFeatureTensorsOp() noexcept {} bool RunOnDevice() override { return DispatchHelper< TensorTypes>:: call(this, Input(1)); } template bool DoRunWithType() { int numExamples = Input(0).numel(); int totalNumFeatures = 0; int totalNumValues = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { const int32_t* inLengthsData = Input(kNumTensorsPerInput * inputIndex).template data(); const bool* inPresenceData = Input(kNumTensorsPerInput * inputIndex + 2).template data(); for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { if (inPresenceData[exampleIndex]) { ++totalNumFeatures; totalNumValues += inLengthsData[exampleIndex]; } } } auto* outLengths = Output(0, {numExamples}, at::dtype()); auto* outKeys = Output(1, {totalNumFeatures}, at::dtype()); auto* outValuesLengths = Output(2, {totalNumFeatures}, at::dtype()); auto* outValuesValues = Output(3, {totalNumValues}, at::dtype()); int32_t* outLengthsData = outLengths->template mutable_data(); int64_t* outKeysData = outKeys->template mutable_data(); int32_t* outValuesLengthsData = outValuesLengths->template mutable_data(); T* outValuesValuesData = outValuesValues->template mutable_data(); int keysOffset = 0; int valuesOffset = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { inValuesOffset_[inputIndex] = 0; } for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { outLengthsData[exampleIndex] = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { const int32_t* inLengthsData = Input(kNumTensorsPerInput * inputIndex).template data(); const auto& inValues = Input(kNumTensorsPerInput * inputIndex + 1); const bool* inPresenceData = Input(kNumTensorsPerInput * inputIndex + 2).template data(); if (inPresenceData[exampleIndex]) { ++outLengthsData[exampleIndex]; outKeysData[keysOffset] = featureIDs_[inputIndex]; outValuesLengthsData[keysOffset] = inLengthsData[exampleIndex]; context_.CopyItemsSameDevice( inValues.dtype(), inLengthsData[exampleIndex], &inValues.template data()[inValuesOffset_[inputIndex]], &outValuesValuesData[valuesOffset]); valuesOffset += inLengthsData[exampleIndex]; inValuesOffset_[inputIndex] += inLengthsData[exampleIndex]; ++keysOffset; } } } return true; } private: const int kNumTensorsPerInput = 3; int numInputs_; std::vector inValuesOffset_; std::vector featureIDs_; }; template class MergeSingleListOrMapFeatureTensorsGradientOp : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit MergeSingleListOrMapFeatureTensorsGradientOp(Args&&... args) : Operator(std::forward(args)...) { numFeatureInputs_ = (InputSize() - 1) / kNumTensorsPerInput; } virtual ~MergeSingleListOrMapFeatureTensorsGradientOp() noexcept {} bool RunOnDevice() override { return DispatchHelper< TensorTypes>:: call(this, Input(InputSize() - 1)); } template bool DoRunWithType() { int numExamples = Input(0).numel(); std::vector outValuesOffset(numFeatureInputs_); for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) { int inputNumValues = 0; const int32_t* inLengthsData = Input(kNumTensorsPerInput * inputIndex).template data(); const bool* inPresenceData = Input(kNumTensorsPerInput * inputIndex + 1).template data(); for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { if (inPresenceData[exampleIndex]) { inputNumValues += inLengthsData[exampleIndex]; } } Output(inputIndex)->Resize(inputNumValues); } const auto& inValuesValuesGrad = Input(InputSize() - 1); const T* inValuesValuesGradData = inValuesValuesGrad.template data(); int inValuesValuesOffset = 0; for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) { const int32_t* inLengthsData = Input(kNumTensorsPerInput * inputIndex).template data(); const bool* inPresenceData = Input(kNumTensorsPerInput * inputIndex + 1).template data(); if (inPresenceData[exampleIndex]) { T* outFeatureValues = Output(inputIndex)->template mutable_data(); context_.CopyItemsSameDevice( inValuesValuesGrad.dtype(), inLengthsData[exampleIndex], &inValuesValuesGradData[inValuesValuesOffset], &outFeatureValues[outValuesOffset[inputIndex]]); outValuesOffset[inputIndex] += inLengthsData[exampleIndex]; inValuesValuesOffset += inLengthsData[exampleIndex]; } } } return true; } private: const int kNumTensorsPerInput = 2; int numFeatureInputs_; }; template class MergeSingleMapFeatureTensorsOp : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit MergeSingleMapFeatureTensorsOp(Args&&... args) : Operator(std::forward(args)...) { numInputs_ = InputSize() / kNumTensorsPerInput; inValuesOffset_.resize(numInputs_); featureIDs_ = this->template GetRepeatedArgument("feature_ids"); } virtual ~MergeSingleMapFeatureTensorsOp() noexcept {} bool RunOnDevice() override { return DispatchHelper< TensorTypes>:: call(this, Input(1)); } template bool DoRunWithType() { return DispatchHelper< TensorTypes2, K>::call(this, Input(2)); } template bool DoRunWithType2() { int numExamples = Input(0).numel(); int totalNumFeatures = 0; int totalNumValues = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { const int32_t* inLengthsData = Input(kNumTensorsPerInput * inputIndex).template data(); const bool* inPresenceData = Input(kNumTensorsPerInput * inputIndex + 3).template data(); for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { if (inPresenceData[exampleIndex]) { ++totalNumFeatures; totalNumValues += inLengthsData[exampleIndex]; } } } auto* outLengths = Output(0, {numExamples}, at::dtype()); auto* outKeys = Output(1, {totalNumFeatures}, at::dtype()); auto* outValuesLengths = Output(2, {totalNumFeatures}, at::dtype()); auto* outValuesKeys = Output(3, {totalNumValues}, at::dtype()); auto* outValuesValues = Output(4, {totalNumValues}, at::dtype()); int32_t* outLengthsData = outLengths->template mutable_data(); int64_t* outKeysData = outKeys->template mutable_data(); int32_t* outValuesLengthsData = outValuesLengths->template mutable_data(); K* outValuesKeysData = outValuesKeys->template mutable_data(); V* outValuesValuesData = outValuesValues->template mutable_data(); int keysOffset = 0; int valuesOffset = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { inValuesOffset_[inputIndex] = 0; } for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { outLengthsData[exampleIndex] = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { const int32_t* inLengthsData = Input(kNumTensorsPerInput * inputIndex).template data(); const auto& inKeys = Input(kNumTensorsPerInput * inputIndex + 1); const auto& inValues = Input(kNumTensorsPerInput * inputIndex + 2); const bool* inPresenceData = Input(kNumTensorsPerInput * inputIndex + 3).template data(); if (inPresenceData[exampleIndex]) { ++outLengthsData[exampleIndex]; outKeysData[keysOffset] = featureIDs_[inputIndex]; outValuesLengthsData[keysOffset] = inLengthsData[exampleIndex]; context_.CopyItemsSameDevice( inKeys.dtype(), inLengthsData[exampleIndex], &inKeys.template data()[inValuesOffset_[inputIndex]], &outValuesKeysData[valuesOffset]); context_.CopyItemsSameDevice( inValues.dtype(), inLengthsData[exampleIndex], &inValues.template data()[inValuesOffset_[inputIndex]], &outValuesValuesData[valuesOffset]); valuesOffset += inLengthsData[exampleIndex]; inValuesOffset_[inputIndex] += inLengthsData[exampleIndex]; ++keysOffset; } } } return true; } private: const int kNumTensorsPerInput = 4; int numInputs_; std::vector inValuesOffset_; std::vector featureIDs_; }; template class MergeMultiScalarFeatureTensorsOp : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit MergeMultiScalarFeatureTensorsOp(Args&&... args) : Operator(std::forward(args)...) { numInputs_ = InputSize() / kNumTensorsPerInput; inKeysOffset_.resize(numInputs_); } virtual ~MergeMultiScalarFeatureTensorsOp() noexcept {} bool RunOnDevice() override { return DispatchHelper< TensorTypes>:: call(this, Input(2)); } template bool DoRunWithType() { int numExamples = Input(0).numel(); int totalNumFeatures = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { totalNumFeatures += Input(kNumTensorsPerInput * inputIndex + 1).numel(); } auto* outLengths = Output(0, {numExamples}, at::dtype()); auto* outKeys = Output(1, {totalNumFeatures}, at::dtype()); auto* outValues = Output(2, {totalNumFeatures}, at::dtype()); int32_t* outLengthsData = outLengths->template mutable_data(); int64_t* outKeysData = outKeys->template mutable_data(); T* outValuesData = outValues->template mutable_data(); int outKeysOffset = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { inKeysOffset_[inputIndex] = 0; } for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { outLengthsData[exampleIndex] = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { const int32_t* inLengthsData = Input(kNumTensorsPerInput * inputIndex).template data(); const int64_t* inKeysData = Input(kNumTensorsPerInput * inputIndex + 1) .template data(); const T* inValuesData = Input(kNumTensorsPerInput * inputIndex + 2).template data(); outLengthsData[exampleIndex] += inLengthsData[exampleIndex]; for (int featureIndex = 0; featureIndex < inLengthsData[exampleIndex]; ++featureIndex) { outKeysData[outKeysOffset] = inKeysData[inKeysOffset_[inputIndex]]; outValuesData[outKeysOffset] = inValuesData[inKeysOffset_[inputIndex]]; ++outKeysOffset; ++inKeysOffset_[inputIndex]; } } } return true; } private: const int kNumTensorsPerInput = 3; int numInputs_; std::vector inKeysOffset_; }; template class MergeMultiScalarFeatureTensorsGradientOp : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit MergeMultiScalarFeatureTensorsGradientOp(Args&&... args) : Operator(std::forward(args)...) { numFeatureInputs_ = (InputSize() - 1) / kNumTensorsPerInput; } virtual ~MergeMultiScalarFeatureTensorsGradientOp() noexcept {} bool RunOnDevice() override { return DispatchHelper< TensorTypes>:: call(this, Input(InputSize() - 1)); } template bool DoRunWithType() { int numExamples = Input(0).numel(); std::vector outValuesOffset(numFeatureInputs_); for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) { int inputNumValues = 0; const int32_t* inLengthsData = Input(kNumTensorsPerInput * inputIndex).template data(); for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { inputNumValues += inLengthsData[exampleIndex]; } Output(inputIndex)->Resize(inputNumValues); } const auto& inValuesGrad = Input(InputSize() - 1); const T* inValuesGradData = inValuesGrad.template data(); int inValuesOffset = 0; for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) { const int32_t* inLengthsData = Input(kNumTensorsPerInput * inputIndex).template data(); if (inLengthsData[exampleIndex] > 0) { T* outFeatureValues = Output(inputIndex)->template mutable_data(); context_.CopyItemsSameDevice( inValuesGrad.dtype(), inLengthsData[exampleIndex], &inValuesGradData[inValuesOffset], &outFeatureValues[outValuesOffset[inputIndex]]); outValuesOffset[inputIndex] += inLengthsData[exampleIndex]; inValuesOffset += inLengthsData[exampleIndex]; } } } return true; } private: int kNumTensorsPerInput = 1; int numFeatureInputs_; }; template class MergeMultiListFeatureTensorsOp : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit MergeMultiListFeatureTensorsOp(Args&&... args) : Operator(std::forward(args)...) { numInputs_ = InputSize() / kNumTensorsPerInput; inKeysOffset_.resize(numInputs_); inValuesValuesOffset_.resize(numInputs_); } virtual ~MergeMultiListFeatureTensorsOp() noexcept {} bool RunOnDevice() override { return DispatchHelper< TensorTypes>:: call(this, Input(3)); } template bool DoRunWithType() { int numExamples = Input(0).numel(); int totalNumFeatures = 0; int totalNumValues = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { totalNumFeatures += Input(kNumTensorsPerInput * inputIndex + 1).numel(); totalNumValues += Input(kNumTensorsPerInput * inputIndex + 3).numel(); } auto* outLengths = Output(0, {numExamples}, at::dtype()); auto* outKeys = Output(1, {totalNumFeatures}, at::dtype()); auto* outValuesLengths = Output(2, {totalNumFeatures}, at::dtype()); auto* outValuesValues = Output(3, {totalNumValues}, at::dtype()); int32_t* outLengthsData = outLengths->template mutable_data(); int64_t* outKeysData = outKeys->template mutable_data(); int32_t* outValuesLengthsData = outValuesLengths->template mutable_data(); T* outValuesValuesData = outValuesValues->template mutable_data(); int outKeysOffset = 0; int outValuesValuesOffset = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { inKeysOffset_[inputIndex] = 0; inValuesValuesOffset_[inputIndex] = 0; } for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { outLengthsData[exampleIndex] = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { const int32_t* inLengthsData = Input(kNumTensorsPerInput * inputIndex).template data(); const int64_t* inKeysData = Input(kNumTensorsPerInput * inputIndex + 1) .template data(); const int32_t* inValuesLengthsData = Input(kNumTensorsPerInput * inputIndex + 2) .template data(); const auto& inValuesValues = Input(kNumTensorsPerInput * inputIndex + 3); outLengthsData[exampleIndex] += inLengthsData[exampleIndex]; for (int featureIndex = 0; featureIndex < inLengthsData[exampleIndex]; ++featureIndex) { outKeysData[outKeysOffset] = inKeysData[inKeysOffset_[inputIndex]]; outValuesLengthsData[outKeysOffset] = inValuesLengthsData[inKeysOffset_[inputIndex]]; context_.CopyItemsSameDevice( inValuesValues.dtype(), inValuesLengthsData[inKeysOffset_[inputIndex]], &inValuesValues .template data()[inValuesValuesOffset_[inputIndex]], &outValuesValuesData[outValuesValuesOffset]); outValuesValuesOffset += inValuesLengthsData[inKeysOffset_[inputIndex]]; inValuesValuesOffset_[inputIndex] += inValuesLengthsData[inKeysOffset_[inputIndex]]; ++outKeysOffset; ++inKeysOffset_[inputIndex]; } } } return true; } private: const int kNumTensorsPerInput = 4; int numInputs_; std::vector inKeysOffset_; std::vector inValuesValuesOffset_; }; template class MergeMultiMapFeatureTensorsOp : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit MergeMultiMapFeatureTensorsOp(Args&&... args) : Operator(std::forward(args)...) { numInputs_ = InputSize() / kNumTensorsPerInput; inKeysOffset_.resize(numInputs_); inValuesValuesOffset_.resize(numInputs_); } virtual ~MergeMultiMapFeatureTensorsOp() noexcept {} bool RunOnDevice() override { return DispatchHelper< TensorTypes>:: call(this, Input(3)); } template bool DoRunWithType() { return DispatchHelper< TensorTypes2, K>::call(this, Input(4)); } template bool DoRunWithType2() { int numExamples = Input(0).numel(); int totalNumFeatures = 0; int totalNumValues = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { totalNumFeatures += Input(kNumTensorsPerInput * inputIndex + 1).numel(); totalNumValues += Input(kNumTensorsPerInput * inputIndex + 4).numel(); } auto* outLengths = Output(0, {numExamples}, at::dtype()); auto* outKeys = Output(1, {totalNumFeatures}, at::dtype()); auto* outValuesLengths = Output(2, {totalNumFeatures}, at::dtype()); auto* outValuesKeys = Output(3, {totalNumValues}, at::dtype()); auto* outValuesValues = Output(4, {totalNumValues}, at::dtype()); int32_t* outLengthsData = outLengths->template mutable_data(); int64_t* outKeysData = outKeys->template mutable_data(); int32_t* outValuesLengthsData = outValuesLengths->template mutable_data(); K* outValuesKeysData = outValuesKeys->template mutable_data(); V* outValuesValuesData = outValuesValues->template mutable_data(); int outKeysOffset = 0; int outValuesValuesOffset = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { inKeysOffset_[inputIndex] = 0; inValuesValuesOffset_[inputIndex] = 0; } for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { outLengthsData[exampleIndex] = 0; for (int inputIndex = 0; inputIndex < numInputs_; ++inputIndex) { const int32_t* inLengthsData = Input(kNumTensorsPerInput * inputIndex).template data(); const int64_t* inKeysData = Input(kNumTensorsPerInput * inputIndex + 1) .template data(); const int32_t* inValuesLengthsData = Input(kNumTensorsPerInput * inputIndex + 2) .template data(); const auto& inValuesKeys = Input(kNumTensorsPerInput * inputIndex + 3); const auto& inValuesValues = Input(kNumTensorsPerInput * inputIndex + 4); outLengthsData[exampleIndex] += inLengthsData[exampleIndex]; for (int featureIndex = 0; featureIndex < inLengthsData[exampleIndex]; ++featureIndex) { outKeysData[outKeysOffset] = inKeysData[inKeysOffset_[inputIndex]]; outValuesLengthsData[outKeysOffset] = inValuesLengthsData[inKeysOffset_[inputIndex]]; context_.CopyItemsSameDevice( inValuesKeys.dtype(), inValuesLengthsData[inKeysOffset_[inputIndex]], &inValuesKeys .template data()[inValuesValuesOffset_[inputIndex]], &outValuesKeysData[outValuesValuesOffset]); context_.CopyItemsSameDevice( inValuesValues.dtype(), inValuesLengthsData[inKeysOffset_[inputIndex]], &inValuesValues .template data()[inValuesValuesOffset_[inputIndex]], &outValuesValuesData[outValuesValuesOffset]); outValuesValuesOffset += inValuesLengthsData[inKeysOffset_[inputIndex]]; inValuesValuesOffset_[inputIndex] += inValuesLengthsData[inKeysOffset_[inputIndex]]; ++outKeysOffset; ++inKeysOffset_[inputIndex]; } } } return true; } private: const int kNumTensorsPerInput = 5; int numInputs_; std::vector inKeysOffset_; std::vector inValuesValuesOffset_; }; template class MergeMultiListOrMapFeatureTensorsGradientOp : public Operator { public: USE_OPERATOR_CONTEXT_FUNCTIONS; template explicit MergeMultiListOrMapFeatureTensorsGradientOp(Args&&... args) : Operator(std::forward(args)...) { numFeatureInputs_ = (InputSize() - 1) / kNumTensorsPerInput; } virtual ~MergeMultiListOrMapFeatureTensorsGradientOp() noexcept {} bool RunOnDevice() override { return DispatchHelper< TensorTypes>:: call(this, Input(InputSize() - 1)); } template bool DoRunWithType() { int numExamples = Input(0).numel(); std::vector outValuesLengthOffset(numFeatureInputs_); std::vector outValuesValuesOffset(numFeatureInputs_); for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) { int inputNumValues = 0; auto& inValuesLength = Input(kNumTensorsPerInput * inputIndex + 1); const int32_t* inValuesLengthsData = inValuesLength.template data(); for (int valuesIndex = 0; valuesIndex < inValuesLength.numel(); ++valuesIndex) { inputNumValues += inValuesLengthsData[valuesIndex]; } Output(inputIndex)->Resize(inputNumValues); } const auto& inValuesValuesGrad = Input(InputSize() - 1); const T* inValuesValuesGradData = inValuesValuesGrad.template data(); int inValuesValuesOffset = 0; for (int exampleIndex = 0; exampleIndex < numExamples; ++exampleIndex) { for (int inputIndex = 0; inputIndex < numFeatureInputs_; ++inputIndex) { const int32_t* inLengthsData = Input(kNumTensorsPerInput * inputIndex).template data(); const int32_t* inValuesLengthsData = Input(kNumTensorsPerInput * inputIndex + 1) .template data(); int valuesLengthCopy = 0; for (int valuesLengthIndex = 0; valuesLengthIndex < inLengthsData[exampleIndex]; ++valuesLengthIndex) { valuesLengthCopy += inValuesLengthsData [outValuesLengthOffset[inputIndex] + valuesLengthIndex]; } if (valuesLengthCopy > 0) { T* outFeatureValues = Output(inputIndex)->template mutable_data(); context_.CopyItemsSameDevice( inValuesValuesGrad.dtype(), valuesLengthCopy, &inValuesValuesGradData[inValuesValuesOffset], &outFeatureValues[outValuesValuesOffset[inputIndex]]); } outValuesLengthOffset[inputIndex] += inLengthsData[exampleIndex]; outValuesValuesOffset[inputIndex] += valuesLengthCopy; inValuesValuesOffset += valuesLengthCopy; } } return true; } private: int kNumTensorsPerInput = 2; int numFeatureInputs_; }; } // namespace caffe2 #endif // CAFFE2_OPERATORS_FEATURE_MAPS_OPS_H_