#ifndef CAFFE2_OPERATORS_GENERATE_PROPOSALS_OP_H_
#define CAFFE2_OPERATORS_GENERATE_PROPOSALS_OP_H_

#include "caffe2/core/export_caffe2_op_to_c10.h"
#include "caffe2/core/context.h"
#include "caffe2/core/operator.h"
#include "caffe2/utils/eigen_utils.h"
#include "caffe2/utils/math.h"

C10_DECLARE_EXPORT_CAFFE2_OP_TO_C10(GenerateProposals);

namespace caffe2 {

namespace utils {

// A sub tensor view
// TODO: Remove???
template <class T>
class ConstTensorView {
 public:
  ConstTensorView(const T* data, const std::vector<int>& dims)
      : data_(data), dims_(dims) {}

  int ndim() const {
    return dims_.size();
  }
  const std::vector<int>& dims() const {
    return dims_;
  }
  int dim(int i) const {
    DCHECK_LE(i, dims_.size());
    return dims_[i];
  }
  const T* data() const {
    return data_;
  }
  size_t size() const {
    return std::accumulate(
        dims_.begin(), dims_.end(), 1, std::multiplies<size_t>());
  }

 private:
  const T* data_ = nullptr;
  std::vector<int> dims_;
};

// Generate a list of bounding box shapes for each pixel based on predefined
//     bounding box shapes 'anchors'.
// anchors: predefined anchors, size(A, 4)
// Return: all_anchors_vec: (H * W, A * 4)
// Need to reshape to (H * W * A, 4) to match the format in python
CAFFE2_API ERMatXf ComputeAllAnchors(
    const TensorCPU& anchors,
    int height,
    int width,
    float feat_stride);

// Like ComputeAllAnchors, but instead of computing anchors for every single
// spatial location, only computes anchors for the already sorted and filtered
// positions after NMS is applied to avoid unnecessary computation.
// `order` is a raveled array of sorted indices in (A, H, W) format.
CAFFE2_API ERArrXXf ComputeSortedAnchors(
    const Eigen::Map<const ERArrXXf>& anchors,
    int height,
    int width,
    float feat_stride,
    const vector<int>& order);

} // namespace utils

// C++ implementation of GenerateProposalsOp
// Generate bounding box proposals for Faster RCNN. The propoasls are generated
//     for a list of images based on image score 'score', bounding box
//     regression result 'deltas' as well as predefined bounding box shapes
//     'anchors'. Greedy non-maximum suppression is applied to generate the
//     final bounding boxes.
// Reference: facebookresearch/Detectron/detectron/ops/generate_proposals.py
template <class Context>
class GenerateProposalsOp final : public Operator<Context> {
 public:
  USE_OPERATOR_CONTEXT_FUNCTIONS;
  template <class... Args>
  explicit GenerateProposalsOp(Args&&... args)
      : Operator<Context>(std::forward<Args>(args)...),
        spatial_scale_(
            this->template GetSingleArgument<float>("spatial_scale", 1.0 / 16)),
        feat_stride_(1.0 / spatial_scale_),
        rpn_pre_nms_topN_(
            this->template GetSingleArgument<int>("pre_nms_topN", 6000)),
        rpn_post_nms_topN_(
            this->template GetSingleArgument<int>("post_nms_topN", 300)),
        rpn_nms_thresh_(
            this->template GetSingleArgument<float>("nms_thresh", 0.7f)),
        rpn_min_size_(this->template GetSingleArgument<float>("min_size", 16)),
        angle_bound_on_(
            this->template GetSingleArgument<bool>("angle_bound_on", true)),
        angle_bound_lo_(
            this->template GetSingleArgument<int>("angle_bound_lo", -90)),
        angle_bound_hi_(
            this->template GetSingleArgument<int>("angle_bound_hi", 90)),
        clip_angle_thresh_(
            this->template GetSingleArgument<float>("clip_angle_thresh", 1.0)),
        legacy_plus_one_(
            this->template GetSingleArgument<bool>("legacy_plus_one", true)) {}

  ~GenerateProposalsOp() {}

  bool RunOnDevice() override;

  // Generate bounding box proposals for a given image
  // im_info: [height, width, im_scale]
  // all_anchors: (H * W * A, 4)
  // bbox_deltas_tensor: (4 * A, H, W)
  // scores_tensor: (A, H, W)
  // out_boxes: (n, 5)
  // out_probs: n
  void ProposalsForOneImage(
      const Eigen::Array3f& im_info,
      const Eigen::Map<const ERArrXXf>& anchors,
      const utils::ConstTensorView<float>& bbox_deltas_tensor,
      const utils::ConstTensorView<float>& scores_tensor,
      ERArrXXf* out_boxes,
      EArrXf* out_probs) const;

 protected:
  // spatial_scale_ must be declared before feat_stride_
  float spatial_scale_{1.0};
  float feat_stride_{1.0};

  // RPN_PRE_NMS_TOP_N
  int rpn_pre_nms_topN_{6000};
  // RPN_POST_NMS_TOP_N
  int rpn_post_nms_topN_{300};
  // RPN_NMS_THRESH
  float rpn_nms_thresh_{0.7};
  // RPN_MIN_SIZE
  float rpn_min_size_{16};
  // If set, for rotated boxes in RRPN, output angles are normalized to be
  // within [angle_bound_lo, angle_bound_hi].
  bool angle_bound_on_{true};
  int angle_bound_lo_{-90};
  int angle_bound_hi_{90};
  // For RRPN, clip almost horizontal boxes within this threshold of
  // tolerance for backward compatibility. Set to negative value for
  // no clipping.
  float clip_angle_thresh_{1.0};
  // The infamous "+ 1" for box width and height dating back to the DPM days
  bool legacy_plus_one_{true};

  // Scratch space required by the CUDA version
  // CUB buffers
  Tensor dev_cub_sort_buffer_{Context::GetDeviceType()};
  Tensor dev_cub_select_buffer_{Context::GetDeviceType()};
  Tensor dev_image_offset_{Context::GetDeviceType()};
  Tensor dev_conv_layer_indexes_{Context::GetDeviceType()};
  Tensor dev_sorted_conv_layer_indexes_{Context::GetDeviceType()};
  Tensor dev_sorted_scores_{Context::GetDeviceType()};
  Tensor dev_boxes_{Context::GetDeviceType()};
  Tensor dev_boxes_keep_flags_{Context::GetDeviceType()};

  // prenms proposals (raw proposals minus empty boxes)
  Tensor dev_image_prenms_boxes_{Context::GetDeviceType()};
  Tensor dev_image_prenms_scores_{Context::GetDeviceType()};
  Tensor dev_prenms_nboxes_{Context::GetDeviceType()};
  Tensor host_prenms_nboxes_{CPU};

  Tensor dev_image_boxes_keep_list_{Context::GetDeviceType()};

  // Tensors used by NMS
  Tensor dev_nms_mask_{Context::GetDeviceType()};
  Tensor host_nms_mask_{CPU};

  // Buffer for output
  Tensor dev_postnms_rois_{Context::GetDeviceType()};
  Tensor dev_postnms_rois_probs_{Context::GetDeviceType()};
};

} // namespace caffe2

#endif // CAFFE2_OPERATORS_GENERATE_PROPOSALS_OP_H_