|
#ifndef CAFFE2_IMAGE_IMAGE_INPUT_OP_H_
|
#define CAFFE2_IMAGE_IMAGE_INPUT_OP_H_
|
|
#include <opencv2/opencv.hpp>
|
|
#include <algorithm>
|
#include <iostream>
|
|
#include "c10/core/thread_pool.h"
|
#include "caffe2/core/common.h"
|
#include "caffe2/core/db.h"
|
#include "caffe2/image/transform_gpu.h"
|
#include "caffe2/operators/prefetch_op.h"
|
#include "caffe2/proto/caffe2_legacy.pb.h"
|
#include "caffe2/utils/cast.h"
|
#include "caffe2/utils/math.h"
|
|
namespace caffe2 {
|
|
class CUDAContext;
|
|
template <class Context>
|
class ImageInputOp final : public PrefetchOperator<Context> {
|
// SINGLE_LABEL: single integer label for multi-class classification
|
// MULTI_LABEL_SPARSE: sparse active label indices for multi-label
|
// classification MULTI_LABEL_DENSE: dense label embedding vector for label
|
// embedding regression MULTI_LABEL_WEIGHTED_SPARSE: sparse active label
|
// indices with per-label weights for multi-label classification
|
// SINGLE_LABEL_WEIGHTED: single integer label for multi-class classification
|
// with weighted sampling EMBEDDING_LABEL: an array of floating numbers
|
// representing dense embedding.
|
// It is useful for model distillation
|
enum LABEL_TYPE {
|
SINGLE_LABEL = 0,
|
MULTI_LABEL_SPARSE = 1,
|
MULTI_LABEL_DENSE = 2,
|
MULTI_LABEL_WEIGHTED_SPARSE = 3,
|
SINGLE_LABEL_WEIGHTED = 4,
|
EMBEDDING_LABEL = 5,
|
};
|
|
// INCEPTION_STYLE: Random crop with size 8% - 100% image area and aspect
|
// ratio in [3/4, 4/3]. Reference: GoogleNet paper
|
enum SCALE_JITTER_TYPE {
|
NO_SCALE_JITTER = 0,
|
INCEPTION_STYLE = 1
|
// TODO(zyan3): ResNet-style random scale jitter
|
};
|
|
public:
|
using OperatorBase::OutputSize;
|
using PrefetchOperator<Context>::context_;
|
using PrefetchOperator<Context>::prefetch_thread_;
|
explicit ImageInputOp(const OperatorDef& operator_def, Workspace* ws);
|
~ImageInputOp() {
|
PrefetchOperator<Context>::Finalize();
|
}
|
|
bool Prefetch() override;
|
bool CopyPrefetched() override;
|
|
private:
|
using BoundingBox = struct {
|
bool valid;
|
int ymin;
|
int xmin;
|
int height;
|
int width;
|
};
|
|
// Structure to store per-image information
|
// This can be modified by the DecodeAnd* so needs
|
// to be privatized per launch.
|
using PerImageArg = struct { BoundingBox bounding_params; };
|
|
bool GetImageAndLabelAndInfoFromDBValue(
|
const string& value,
|
cv::Mat* img,
|
PerImageArg& info,
|
int item_id,
|
std::mt19937* randgen);
|
void DecodeAndTransform(
|
const std::string& value,
|
float* image_data,
|
int item_id,
|
const int channels,
|
std::size_t thread_index);
|
void DecodeAndTransposeOnly(
|
const std::string& value,
|
uint8_t* image_data,
|
int item_id,
|
const int channels,
|
std::size_t thread_index);
|
bool ApplyTransformOnGPU(
|
const std::vector<std::int64_t>& dims,
|
const c10::Device& type);
|
|
unique_ptr<db::DBReader> owned_reader_;
|
const db::DBReader* reader_;
|
Tensor prefetched_image_;
|
Tensor prefetched_label_;
|
vector<Tensor> prefetched_additional_outputs_;
|
Tensor prefetched_image_on_device_;
|
Tensor prefetched_label_on_device_;
|
vector<Tensor> prefetched_additional_outputs_on_device_;
|
// Default parameters for images
|
PerImageArg default_arg_;
|
int batch_size_;
|
LABEL_TYPE label_type_;
|
int num_labels_;
|
|
bool color_;
|
bool color_jitter_;
|
float img_saturation_;
|
float img_brightness_;
|
float img_contrast_;
|
bool color_lighting_;
|
float color_lighting_std_;
|
std::vector<std::vector<float>> color_lighting_eigvecs_;
|
std::vector<float> color_lighting_eigvals_;
|
SCALE_JITTER_TYPE scale_jitter_type_;
|
int scale_;
|
// Minsize is similar to scale except that it will only
|
// force the image to scale up if it is too small. In other words,
|
// it ensures that both dimensions of the image are at least minsize_
|
int minsize_;
|
bool warp_;
|
int crop_;
|
std::vector<float> mean_;
|
std::vector<float> std_;
|
Tensor mean_gpu_;
|
Tensor std_gpu_;
|
bool mirror_;
|
bool is_test_;
|
bool use_caffe_datum_;
|
bool gpu_transform_;
|
bool mean_std_copied_ = false;
|
|
// thread pool for parse + decode
|
int num_decode_threads_;
|
int additional_inputs_offset_;
|
int additional_inputs_count_;
|
std::vector<int> additional_output_sizes_;
|
std::shared_ptr<TaskThreadPool> thread_pool_;
|
|
// Output type for GPU transform path
|
TensorProto_DataType output_type_;
|
|
// random minsize
|
vector<int> random_scale_;
|
bool random_scaling_;
|
|
// Working variables
|
std::vector<std::mt19937> randgen_per_thread_;
|
|
// number of exceptions produced by opencv while reading image data
|
std::atomic<long> num_decode_errors_in_batch_{0};
|
// opencv exceptions tolerance
|
float max_decode_error_ratio_;
|
};
|
|
template <class Context>
|
ImageInputOp<Context>::ImageInputOp(
|
const OperatorDef& operator_def,
|
Workspace* ws)
|
: PrefetchOperator<Context>(operator_def, ws),
|
reader_(nullptr),
|
batch_size_(
|
OperatorBase::template GetSingleArgument<int>("batch_size", 0)),
|
label_type_(static_cast<LABEL_TYPE>(
|
OperatorBase::template GetSingleArgument<int>("label_type", 0))),
|
num_labels_(
|
OperatorBase::template GetSingleArgument<int>("num_labels", 0)),
|
color_(OperatorBase::template GetSingleArgument<int>("color", 1)),
|
color_jitter_(
|
OperatorBase::template GetSingleArgument<int>("color_jitter", 0)),
|
img_saturation_(OperatorBase::template GetSingleArgument<float>(
|
"img_saturation",
|
0.4)),
|
img_brightness_(OperatorBase::template GetSingleArgument<float>(
|
"img_brightness",
|
0.4)),
|
img_contrast_(
|
OperatorBase::template GetSingleArgument<float>("img_contrast", 0.4)),
|
color_lighting_(
|
OperatorBase::template GetSingleArgument<int>("color_lighting", 0)),
|
color_lighting_std_(OperatorBase::template GetSingleArgument<float>(
|
"color_lighting_std",
|
0.1)),
|
scale_jitter_type_(static_cast<SCALE_JITTER_TYPE>(
|
OperatorBase::template GetSingleArgument<int>(
|
"scale_jitter_type",
|
0))),
|
scale_(OperatorBase::template GetSingleArgument<int>("scale", -1)),
|
minsize_(OperatorBase::template GetSingleArgument<int>("minsize", -1)),
|
warp_(OperatorBase::template GetSingleArgument<int>("warp", 0)),
|
crop_(OperatorBase::template GetSingleArgument<int>("crop", -1)),
|
mirror_(OperatorBase::template GetSingleArgument<int>("mirror", 0)),
|
is_test_(OperatorBase::template GetSingleArgument<int>(
|
OpSchema::Arg_IsTest,
|
0)),
|
use_caffe_datum_(
|
OperatorBase::template GetSingleArgument<int>("use_caffe_datum", 0)),
|
gpu_transform_(OperatorBase::template GetSingleArgument<int>(
|
"use_gpu_transform",
|
0)),
|
num_decode_threads_(
|
OperatorBase::template GetSingleArgument<int>("decode_threads", 4)),
|
additional_output_sizes_(
|
OperatorBase::template GetRepeatedArgument<int>("output_sizes", {})),
|
thread_pool_(std::make_shared<TaskThreadPool>(num_decode_threads_)),
|
// output type only supported with CUDA and use_gpu_transform for now
|
output_type_(
|
cast::GetCastDataType(ArgumentHelper(operator_def), "output_type")),
|
random_scale_(OperatorBase::template GetRepeatedArgument<int>(
|
"random_scale",
|
{-1, -1})),
|
max_decode_error_ratio_(OperatorBase::template GetSingleArgument<float>(
|
"max_decode_error_ratio",
|
1.0)) {
|
if ((random_scale_[0] == -1) || (random_scale_[1] == -1)) {
|
random_scaling_ = false;
|
} else {
|
random_scaling_ = true;
|
minsize_ = random_scale_[0];
|
}
|
|
mean_ = OperatorBase::template GetRepeatedArgument<float>(
|
"mean_per_channel",
|
{OperatorBase::template GetSingleArgument<float>("mean", 0.)});
|
|
std_ = OperatorBase::template GetRepeatedArgument<float>(
|
"std_per_channel",
|
{OperatorBase::template GetSingleArgument<float>("std", 1.)});
|
|
if (additional_output_sizes_.size() == 0) {
|
additional_output_sizes_ = std::vector<int>(OutputSize() - 2, 1);
|
} else {
|
CAFFE_ENFORCE(
|
additional_output_sizes_.size() == OutputSize() - 2,
|
"If the output sizes are specified, they must be specified for all "
|
"additional outputs");
|
}
|
additional_inputs_count_ = OutputSize() - 2;
|
|
default_arg_.bounding_params = {
|
false,
|
OperatorBase::template GetSingleArgument<int>("bounding_ymin", -1),
|
OperatorBase::template GetSingleArgument<int>("bounding_xmin", -1),
|
OperatorBase::template GetSingleArgument<int>("bounding_height", -1),
|
OperatorBase::template GetSingleArgument<int>("bounding_width", -1),
|
};
|
|
if (operator_def.input_size() == 0) {
|
LOG(ERROR) << "You are using an old ImageInputOp format that creates "
|
"a local db reader. Consider moving to the new style "
|
"that takes in a DBReader blob instead.";
|
string db_name = OperatorBase::template GetSingleArgument<string>("db", "");
|
CAFFE_ENFORCE_GT(db_name.size(), 0, "Must specify a db name.");
|
owned_reader_.reset(new db::DBReader(
|
OperatorBase::template GetSingleArgument<string>("db_type", "leveldb"),
|
db_name));
|
reader_ = owned_reader_.get();
|
}
|
|
// hard-coded PCA eigenvectors and eigenvalues, based on RBG channel order
|
color_lighting_eigvecs_.push_back(
|
std::vector<float>{-144.7125f, 183.396f, 102.2295f});
|
color_lighting_eigvecs_.push_back(
|
std::vector<float>{-148.104f, -1.1475f, -207.57f});
|
color_lighting_eigvecs_.push_back(
|
std::vector<float>{-148.818f, -177.174f, 107.1765f});
|
|
color_lighting_eigvals_ = std::vector<float>{0.2175f, 0.0188f, 0.0045f};
|
|
CAFFE_ENFORCE_GT(batch_size_, 0, "Batch size should be nonnegative.");
|
if (use_caffe_datum_) {
|
CAFFE_ENFORCE(
|
label_type_ == SINGLE_LABEL || label_type_ == SINGLE_LABEL_WEIGHTED,
|
"Caffe datum only supports single integer label");
|
}
|
if (label_type_ != SINGLE_LABEL && label_type_ != SINGLE_LABEL_WEIGHTED) {
|
CAFFE_ENFORCE_GT(
|
num_labels_,
|
0,
|
"Number of labels must be set for using either sparse label indices or dense label embedding.");
|
}
|
if (label_type_ == MULTI_LABEL_WEIGHTED_SPARSE ||
|
label_type_ == SINGLE_LABEL_WEIGHTED) {
|
additional_inputs_offset_ = 3;
|
} else {
|
additional_inputs_offset_ = 2;
|
}
|
CAFFE_ENFORCE(
|
(scale_ > 0) != (minsize_ > 0),
|
"Must provide one and only one of scaling or minsize");
|
CAFFE_ENFORCE_GT(crop_, 0, "Must provide the cropping value.");
|
CAFFE_ENFORCE_GE(
|
scale_ > 0 ? scale_ : minsize_,
|
crop_,
|
"The scale/minsize value must be no smaller than the crop value.");
|
|
CAFFE_ENFORCE_EQ(
|
mean_.size(),
|
std_.size(),
|
"The mean and std. dev vectors must be of the same size.");
|
CAFFE_ENFORCE(
|
mean_.size() == 1 || mean_.size() == 3,
|
"The mean and std. dev vectors must be of size 1 or 3");
|
CAFFE_ENFORCE(
|
!use_caffe_datum_ || OutputSize() == 2,
|
"There can only be 2 outputs if the Caffe datum format is used");
|
|
CAFFE_ENFORCE(
|
random_scale_.size() == 2, "Must provide [scale_min, scale_max]");
|
CAFFE_ENFORCE_GE(
|
random_scale_[1],
|
random_scale_[0],
|
"random scale must provide a range [min, max]");
|
|
if (default_arg_.bounding_params.ymin < 0 ||
|
default_arg_.bounding_params.xmin < 0 ||
|
default_arg_.bounding_params.height < 0 ||
|
default_arg_.bounding_params.width < 0) {
|
default_arg_.bounding_params.valid = false;
|
} else {
|
default_arg_.bounding_params.valid = true;
|
}
|
|
if (mean_.size() == 1) {
|
// We are going to extend to 3 using the first value
|
mean_.resize(3, mean_[0]);
|
std_.resize(3, std_[0]);
|
}
|
|
LOG(INFO) << "Creating an image input op with the following setting: ";
|
LOG(INFO) << " Using " << num_decode_threads_ << " CPU threads;";
|
if (gpu_transform_) {
|
LOG(INFO) << " Performing transformation on GPU";
|
}
|
LOG(INFO) << " Outputting in batches of " << batch_size_ << " images;";
|
LOG(INFO) << " Treating input image as "
|
<< (color_ ? "color " : "grayscale ") << "image;";
|
if (default_arg_.bounding_params.valid) {
|
LOG(INFO) << " Applying a default bounding box of Y ["
|
<< default_arg_.bounding_params.ymin << "; "
|
<< default_arg_.bounding_params.ymin +
|
default_arg_.bounding_params.height
|
<< ") x X [" << default_arg_.bounding_params.xmin << "; "
|
<< default_arg_.bounding_params.xmin +
|
default_arg_.bounding_params.width
|
<< ")";
|
}
|
if (scale_ > 0 && !random_scaling_) {
|
LOG(INFO) << " Scaling image to " << scale_
|
<< (warp_ ? " with " : " without ") << "warping;";
|
} else {
|
if (random_scaling_) {
|
// randomly set min_size_ for each image
|
LOG(INFO) << " Randomly scaling shortest side between "
|
<< random_scale_[0] << " and " << random_scale_[1];
|
} else {
|
// Here, minsize_ > 0
|
LOG(INFO) << " Ensuring minimum image size of " << minsize_
|
<< (warp_ ? " with " : " without ") << "warping;";
|
}
|
}
|
LOG(INFO) << " " << (is_test_ ? "Central" : "Random")
|
<< " cropping image to " << crop_
|
<< (mirror_ ? " with " : " without ") << "random mirroring;";
|
LOG(INFO) << "Label Type: " << label_type_;
|
LOG(INFO) << "Num Labels: " << num_labels_;
|
|
auto mit = mean_.begin();
|
auto sit = std_.begin();
|
|
for (int i = 0; mit != mean_.end() && sit != std_.end(); ++mit, ++sit, ++i) {
|
LOG(INFO) << " Default [Channel " << i << "] Subtract mean " << *mit
|
<< " and divide by std " << *sit << ".";
|
// We actually will use the inverse of std, so inverse it here
|
*sit = 1.f / *sit;
|
}
|
LOG(INFO) << " Outputting images as "
|
<< OperatorBase::template GetSingleArgument<string>(
|
"output_type", "unknown")
|
<< ".";
|
|
std::mt19937 meta_randgen(time(nullptr));
|
for (int i = 0; i < num_decode_threads_; ++i) {
|
randgen_per_thread_.emplace_back(meta_randgen());
|
}
|
ReinitializeTensor(
|
&prefetched_image_,
|
{int64_t(batch_size_),
|
int64_t(crop_),
|
int64_t(crop_),
|
int64_t(color_ ? 3 : 1)},
|
at::dtype<uint8_t>().device(CPU));
|
std::vector<int64_t> sizes;
|
if (label_type_ != SINGLE_LABEL && label_type_ != SINGLE_LABEL_WEIGHTED) {
|
sizes = std::vector<int64_t>{int64_t(batch_size_), int64_t(num_labels_)};
|
} else {
|
sizes = std::vector<int64_t>{batch_size_};
|
}
|
// data type for prefetched_label_ is actually not known here..
|
ReinitializeTensor(&prefetched_label_, sizes, at::dtype<int>().device(CPU));
|
|
for (int i = 0; i < additional_output_sizes_.size(); ++i) {
|
prefetched_additional_outputs_on_device_.emplace_back();
|
prefetched_additional_outputs_.emplace_back();
|
}
|
}
|
|
// Inception-stype scale jittering
|
template <class Context>
|
bool RandomSizedCropping(cv::Mat* img, const int crop, std::mt19937* randgen) {
|
cv::Mat scaled_img;
|
bool inception_scale_jitter = false;
|
int im_height = img->rows, im_width = img->cols;
|
int area = im_height * im_width;
|
std::uniform_real_distribution<> area_dis(0.08, 1.0);
|
std::uniform_real_distribution<> aspect_ratio_dis(3.0 / 4.0, 4.0 / 3.0);
|
|
cv::Mat cropping;
|
for (int i = 0; i < 10; ++i) {
|
int target_area = int(ceil(area_dis(*randgen) * area));
|
float aspect_ratio = aspect_ratio_dis(*randgen);
|
int nh = floor(std::sqrt(((float)target_area / aspect_ratio)));
|
int nw = floor(std::sqrt(((float)target_area * aspect_ratio)));
|
if (nh >= 1 && nh <= im_height && nw >= 1 && nw <= im_width) {
|
int height_offset =
|
std::uniform_int_distribution<>(0, im_height - nh)(*randgen);
|
int width_offset =
|
std::uniform_int_distribution<>(0, im_width - nw)(*randgen);
|
cv::Rect ROI(width_offset, height_offset, nw, nh);
|
cropping = (*img)(ROI);
|
cv::resize(
|
cropping, scaled_img, cv::Size(crop, crop), 0, 0, cv::INTER_AREA);
|
*img = scaled_img;
|
inception_scale_jitter = true;
|
break;
|
}
|
}
|
return inception_scale_jitter;
|
}
|
|
template <class Context>
|
bool ImageInputOp<Context>::GetImageAndLabelAndInfoFromDBValue(
|
const string& value,
|
cv::Mat* img,
|
PerImageArg& info,
|
int item_id,
|
std::mt19937* randgen) {
|
//
|
// recommend using --caffe2_use_fatal_for_enforce=1 when using ImageInputOp
|
// as this function runs on a worker thread and the exceptions from
|
// CAFFE_ENFORCE are silently dropped by the thread worker functions
|
//
|
cv::Mat src;
|
|
// Use the default information for images
|
info = default_arg_;
|
if (use_caffe_datum_) {
|
// The input is a caffe datum format.
|
CaffeDatum datum;
|
CAFFE_ENFORCE(datum.ParseFromString(value));
|
|
prefetched_label_.mutable_data<int>()[item_id] = datum.label();
|
if (datum.encoded()) {
|
// encoded image in datum.
|
// count the number of exceptions from opencv imdecode
|
try {
|
src = cv::imdecode(
|
cv::Mat(
|
1,
|
datum.data().size(),
|
CV_8UC1,
|
const_cast<char*>(datum.data().data())),
|
color_ ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE);
|
if (src.rows == 0 || src.cols == 0) {
|
num_decode_errors_in_batch_++;
|
src = cv::Mat::zeros(cv::Size(224, 224), CV_8UC3);
|
}
|
} catch (cv::Exception& e) {
|
num_decode_errors_in_batch_++;
|
src = cv::Mat::zeros(cv::Size(224, 224), CV_8UC3);
|
}
|
} else {
|
// Raw image in datum.
|
CAFFE_ENFORCE(datum.channels() == 3 || datum.channels() == 1);
|
|
int src_c = datum.channels();
|
src.create(
|
datum.height(), datum.width(), (src_c == 3) ? CV_8UC3 : CV_8UC1);
|
|
if (src_c == 1) {
|
memcpy(src.ptr<uchar>(0), datum.data().data(), datum.data().size());
|
} else {
|
// Datum stores things in CHW order, let's do HWC for images to make
|
// things more consistent with conventional image storage.
|
for (int c = 0; c < 3; ++c) {
|
const char* datum_buffer =
|
datum.data().data() + datum.height() * datum.width() * c;
|
uchar* ptr = src.ptr<uchar>(0) + c;
|
for (int h = 0; h < datum.height(); ++h) {
|
for (int w = 0; w < datum.width(); ++w) {
|
*ptr = *(datum_buffer++);
|
ptr += 3;
|
}
|
}
|
}
|
}
|
}
|
} else {
|
// The input is a caffe2 format.
|
TensorProtos protos;
|
CAFFE_ENFORCE(protos.ParseFromString(value));
|
const TensorProto& image_proto = protos.protos(0);
|
const TensorProto& label_proto = protos.protos(1);
|
// add handle protos
|
vector<TensorProto> additional_output_protos;
|
int start = additional_inputs_offset_;
|
int end = start + additional_inputs_count_;
|
for (int i = start; i < end; ++i) {
|
additional_output_protos.push_back(protos.protos(i));
|
}
|
|
if (protos.protos_size() == end + 1) {
|
// We have bounding box information
|
const TensorProto& bounding_proto = protos.protos(end);
|
DCHECK_EQ(bounding_proto.data_type(), TensorProto::INT32);
|
DCHECK_EQ(bounding_proto.int32_data_size(), 4);
|
info.bounding_params.valid = true;
|
info.bounding_params.ymin = bounding_proto.int32_data(0);
|
info.bounding_params.xmin = bounding_proto.int32_data(1);
|
info.bounding_params.height = bounding_proto.int32_data(2);
|
info.bounding_params.width = bounding_proto.int32_data(3);
|
}
|
|
if (image_proto.data_type() == TensorProto::STRING) {
|
// encoded image string.
|
DCHECK_EQ(image_proto.string_data_size(), 1);
|
const string& encoded_image_str = image_proto.string_data(0);
|
int encoded_size = encoded_image_str.size();
|
// We use a cv::Mat to wrap the encoded str so we do not need a copy.
|
// count the number of exceptions from opencv imdecode
|
try {
|
src = cv::imdecode(
|
cv::Mat(
|
1,
|
&encoded_size,
|
CV_8UC1,
|
const_cast<char*>(encoded_image_str.data())),
|
color_ ? cv::IMREAD_COLOR : cv::IMREAD_GRAYSCALE);
|
if (src.rows == 0 || src.cols == 0) {
|
num_decode_errors_in_batch_++;
|
src = cv::Mat::zeros(cv::Size(224, 224), CV_8UC3);
|
}
|
} catch (cv::Exception& e) {
|
num_decode_errors_in_batch_++;
|
src = cv::Mat::zeros(cv::Size(224, 224), CV_8UC3);
|
}
|
} else if (image_proto.data_type() == TensorProto::BYTE) {
|
// raw image content.
|
int src_c = (image_proto.dims_size() == 3) ? image_proto.dims(2) : 1;
|
CAFFE_ENFORCE(src_c == 3 || src_c == 1);
|
|
src.create(
|
image_proto.dims(0),
|
image_proto.dims(1),
|
(src_c == 3) ? CV_8UC3 : CV_8UC1);
|
memcpy(
|
src.ptr<uchar>(0),
|
image_proto.byte_data().data(),
|
image_proto.byte_data().size());
|
} else {
|
LOG(FATAL) << "Unknown image data type.";
|
}
|
|
// TODO: if image decoding was unsuccessful, set label to 0
|
if (label_proto.data_type() == TensorProto::FLOAT) {
|
if (label_type_ == SINGLE_LABEL || label_type_ == SINGLE_LABEL_WEIGHTED) {
|
DCHECK_EQ(label_proto.float_data_size(), 1);
|
prefetched_label_.mutable_data<float>()[item_id] =
|
label_proto.float_data(0);
|
} else if (label_type_ == MULTI_LABEL_SPARSE) {
|
float* label_data =
|
prefetched_label_.mutable_data<float>() + item_id * num_labels_;
|
memset(label_data, 0, sizeof(float) * num_labels_);
|
for (int i = 0; i < label_proto.float_data_size(); ++i) {
|
label_data[(int)label_proto.float_data(i)] = 1.0;
|
}
|
} else if (label_type_ == MULTI_LABEL_WEIGHTED_SPARSE) {
|
const TensorProto& weight_proto = protos.protos(2);
|
float* label_data =
|
prefetched_label_.mutable_data<float>() + item_id * num_labels_;
|
memset(label_data, 0, sizeof(float) * num_labels_);
|
for (int i = 0; i < label_proto.float_data_size(); ++i) {
|
label_data[(int)label_proto.float_data(i)] =
|
weight_proto.float_data(i);
|
}
|
} else if (
|
label_type_ == MULTI_LABEL_DENSE || label_type_ == EMBEDDING_LABEL) {
|
CAFFE_ENFORCE(label_proto.float_data_size() == num_labels_);
|
float* label_data =
|
prefetched_label_.mutable_data<float>() + item_id * num_labels_;
|
for (int i = 0; i < label_proto.float_data_size(); ++i) {
|
label_data[i] = label_proto.float_data(i);
|
}
|
} else {
|
LOG(ERROR) << "Unknown label type:" << label_type_;
|
}
|
} else if (label_proto.data_type() == TensorProto::INT32) {
|
if (label_type_ == SINGLE_LABEL || label_type_ == SINGLE_LABEL_WEIGHTED) {
|
DCHECK_EQ(label_proto.int32_data_size(), 1);
|
prefetched_label_.mutable_data<int>()[item_id] =
|
label_proto.int32_data(0);
|
} else if (label_type_ == MULTI_LABEL_SPARSE) {
|
int* label_data =
|
prefetched_label_.mutable_data<int>() + item_id * num_labels_;
|
memset(label_data, 0, sizeof(int) * num_labels_);
|
for (int i = 0; i < label_proto.int32_data_size(); ++i) {
|
label_data[label_proto.int32_data(i)] = 1;
|
}
|
} else if (label_type_ == MULTI_LABEL_WEIGHTED_SPARSE) {
|
const TensorProto& weight_proto = protos.protos(2);
|
float* label_data =
|
prefetched_label_.mutable_data<float>() + item_id * num_labels_;
|
memset(label_data, 0, sizeof(float) * num_labels_);
|
for (int i = 0; i < label_proto.int32_data_size(); ++i) {
|
label_data[label_proto.int32_data(i)] = weight_proto.float_data(i);
|
}
|
} else if (
|
label_type_ == MULTI_LABEL_DENSE || label_type_ == EMBEDDING_LABEL) {
|
CAFFE_ENFORCE(label_proto.int32_data_size() == num_labels_);
|
int* label_data =
|
prefetched_label_.mutable_data<int>() + item_id * num_labels_;
|
for (int i = 0; i < label_proto.int32_data_size(); ++i) {
|
label_data[i] = label_proto.int32_data(i);
|
}
|
} else {
|
LOG(ERROR) << "Unknown label type:" << label_type_;
|
}
|
} else {
|
LOG(FATAL) << "Unsupported label data type.";
|
}
|
|
for (int i = 0; i < additional_output_protos.size(); ++i) {
|
auto additional_output_proto = additional_output_protos[i];
|
if (additional_output_proto.data_type() == TensorProto::FLOAT) {
|
float* additional_output =
|
prefetched_additional_outputs_[i].template mutable_data<float>() +
|
item_id * additional_output_proto.float_data_size();
|
|
for (int j = 0; j < additional_output_proto.float_data_size(); ++j) {
|
additional_output[j] = additional_output_proto.float_data(j);
|
}
|
} else if (additional_output_proto.data_type() == TensorProto::INT32) {
|
int* additional_output =
|
prefetched_additional_outputs_[i].template mutable_data<int>() +
|
item_id * additional_output_proto.int32_data_size();
|
|
for (int j = 0; j < additional_output_proto.int32_data_size(); ++j) {
|
additional_output[j] = additional_output_proto.int32_data(j);
|
}
|
} else if (additional_output_proto.data_type() == TensorProto::INT64) {
|
int64_t* additional_output =
|
prefetched_additional_outputs_[i].template mutable_data<int64_t>() +
|
item_id * additional_output_proto.int64_data_size();
|
|
for (int j = 0; j < additional_output_proto.int64_data_size(); ++j) {
|
additional_output[j] = additional_output_proto.int64_data(j);
|
}
|
} else if (additional_output_proto.data_type() == TensorProto::UINT8) {
|
uint8_t* additional_output =
|
prefetched_additional_outputs_[i].template mutable_data<uint8_t>() +
|
item_id * additional_output_proto.int32_data_size();
|
|
for (int j = 0; j < additional_output_proto.int32_data_size(); ++j) {
|
additional_output[j] =
|
static_cast<uint8_t>(additional_output_proto.int32_data(j));
|
}
|
} else {
|
LOG(FATAL) << "Unsupported output type.";
|
}
|
}
|
}
|
|
//
|
// convert source to the color format requested from Op
|
//
|
int out_c = color_ ? 3 : 1;
|
if (out_c == src.channels()) {
|
*img = src;
|
} else {
|
cv::cvtColor(
|
src, *img, (out_c == 1) ? cv::COLOR_BGR2GRAY : cv::COLOR_GRAY2BGR);
|
}
|
|
// Note(Yangqing): I believe that the mat should be created continuous.
|
CAFFE_ENFORCE(img->isContinuous());
|
|
// Sanity check now that we decoded everything
|
|
// Ensure that the bounding box is legit
|
if (info.bounding_params.valid &&
|
(src.rows < info.bounding_params.ymin + info.bounding_params.height ||
|
src.cols < info.bounding_params.xmin + info.bounding_params.width)) {
|
info.bounding_params.valid = false;
|
}
|
|
// Apply the bounding box if requested
|
if (info.bounding_params.valid) {
|
// If we reach here, we know the parameters are sane
|
cv::Rect bounding_box(
|
info.bounding_params.xmin,
|
info.bounding_params.ymin,
|
info.bounding_params.width,
|
info.bounding_params.height);
|
*img = (*img)(bounding_box);
|
|
/*
|
LOG(INFO) << "Did bounding with ymin:"
|
<< info.bounding_params.ymin << " xmin:" <<
|
info.bounding_params.xmin
|
<< " height:" << info.bounding_params.height
|
<< " width:" << info.bounding_params.width << "\n";
|
LOG(INFO) << "Bounded matrix: " << img;
|
*/
|
} else {
|
// LOG(INFO) << "No bounding\n";
|
}
|
|
cv::Mat scaled_img;
|
bool inception_scale_jitter = false;
|
if (scale_jitter_type_ == INCEPTION_STYLE) {
|
if (!is_test_) {
|
// Inception-stype scale jittering is only used for training
|
inception_scale_jitter =
|
RandomSizedCropping<Context>(img, crop_, randgen);
|
// if a random crop is still not found, do simple random cropping later
|
}
|
}
|
|
if ((scale_jitter_type_ == NO_SCALE_JITTER) ||
|
(scale_jitter_type_ == INCEPTION_STYLE && !inception_scale_jitter)) {
|
int scaled_width, scaled_height;
|
int scale_to_use = scale_ > 0 ? scale_ : minsize_;
|
|
// set the random minsize
|
if (random_scaling_) {
|
scale_to_use = std::uniform_int_distribution<>(
|
random_scale_[0], random_scale_[1])(*randgen);
|
}
|
|
if (warp_) {
|
scaled_width = scale_to_use;
|
scaled_height = scale_to_use;
|
} else if (img->rows > img->cols) {
|
scaled_width = scale_to_use;
|
scaled_height = static_cast<float>(img->rows) * scale_to_use / img->cols;
|
} else {
|
scaled_height = scale_to_use;
|
scaled_width = static_cast<float>(img->cols) * scale_to_use / img->rows;
|
}
|
if ((scale_ > 0 &&
|
(scaled_height != img->rows || scaled_width != img->cols)) ||
|
(scaled_height > img->rows || scaled_width > img->cols)) {
|
// We rescale in all cases if we are using scale_
|
// but only to make the image bigger if using minsize_
|
/*
|
LOG(INFO) << "Scaling to " << scaled_width << " x " << scaled_height
|
<< " From " << img->cols << " x " << img->rows;
|
*/
|
cv::resize(
|
*img,
|
scaled_img,
|
cv::Size(scaled_width, scaled_height),
|
0,
|
0,
|
cv::INTER_AREA);
|
*img = scaled_img;
|
}
|
}
|
|
// TODO(Yangqing): return false if any error happens.
|
return true;
|
}
|
|
// assume HWC order and color channels BGR
|
template <class Context>
|
void Saturation(
|
float* img,
|
const int img_size,
|
const float alpha_rand,
|
std::mt19937* randgen) {
|
float alpha = 1.0f +
|
std::uniform_real_distribution<float>(-alpha_rand, alpha_rand)(*randgen);
|
// BGR to Gray scale image: R -> 0.299, G -> 0.587, B -> 0.114
|
int p = 0;
|
for (int h = 0; h < img_size; ++h) {
|
for (int w = 0; w < img_size; ++w) {
|
float gray_color = img[3 * p] * 0.114f + img[3 * p + 1] * 0.587f +
|
img[3 * p + 2] * 0.299f;
|
for (int c = 0; c < 3; ++c) {
|
img[3 * p + c] = img[3 * p + c] * alpha + gray_color * (1.0f - alpha);
|
}
|
p++;
|
}
|
}
|
}
|
|
// assume HWC order and color channels BGR
|
template <class Context>
|
void Brightness(
|
float* img,
|
const int img_size,
|
const float alpha_rand,
|
std::mt19937* randgen) {
|
float alpha = 1.0f +
|
std::uniform_real_distribution<float>(-alpha_rand, alpha_rand)(*randgen);
|
int p = 0;
|
for (int h = 0; h < img_size; ++h) {
|
for (int w = 0; w < img_size; ++w) {
|
for (int c = 0; c < 3; ++c) {
|
img[p++] *= alpha;
|
}
|
}
|
}
|
}
|
|
// assume HWC order and color channels BGR
|
template <class Context>
|
void Contrast(
|
float* img,
|
const int img_size,
|
const float alpha_rand,
|
std::mt19937* randgen) {
|
float gray_mean = 0;
|
int p = 0;
|
for (int h = 0; h < img_size; ++h) {
|
for (int w = 0; w < img_size; ++w) {
|
// BGR to Gray scale image: R -> 0.299, G -> 0.587, B -> 0.114
|
gray_mean += img[3 * p] * 0.114f + img[3 * p + 1] * 0.587f +
|
img[3 * p + 2] * 0.299f;
|
p++;
|
}
|
}
|
gray_mean /= (img_size * img_size);
|
|
float alpha = 1.0f +
|
std::uniform_real_distribution<float>(-alpha_rand, alpha_rand)(*randgen);
|
p = 0;
|
for (int h = 0; h < img_size; ++h) {
|
for (int w = 0; w < img_size; ++w) {
|
for (int c = 0; c < 3; ++c) {
|
img[p] = img[p] * alpha + gray_mean * (1.0f - alpha);
|
p++;
|
}
|
}
|
}
|
}
|
|
// assume HWC order and color channels BGR
|
template <class Context>
|
void ColorJitter(
|
float* img,
|
const int img_size,
|
const float saturation,
|
const float brightness,
|
const float contrast,
|
std::mt19937* randgen) {
|
std::srand(unsigned(std::time(0)));
|
std::vector<int> jitter_order{0, 1, 2};
|
// obtain a time-based seed:
|
unsigned seed = std::chrono::system_clock::now().time_since_epoch().count();
|
std::shuffle(
|
jitter_order.begin(),
|
jitter_order.end(),
|
std::default_random_engine(seed));
|
|
for (int i = 0; i < 3; ++i) {
|
if (jitter_order[i] == 0) {
|
Saturation<Context>(img, img_size, saturation, randgen);
|
} else if (jitter_order[i] == 1) {
|
Brightness<Context>(img, img_size, brightness, randgen);
|
} else {
|
Contrast<Context>(img, img_size, contrast, randgen);
|
}
|
}
|
}
|
|
// assume HWC order and color channels BGR
|
template <class Context>
|
void ColorLighting(
|
float* img,
|
const int img_size,
|
const float alpha_std,
|
const std::vector<std::vector<float>>& eigvecs,
|
const std::vector<float>& eigvals,
|
std::mt19937* randgen) {
|
std::normal_distribution<float> d(0, alpha_std);
|
std::vector<float> alphas(3);
|
for (int i = 0; i < 3; ++i) {
|
alphas[i] = d(*randgen);
|
}
|
|
std::vector<float> delta_rgb(3, 0.0);
|
for (int i = 0; i < 3; ++i) {
|
for (int j = 0; j < 3; ++j) {
|
delta_rgb[i] += eigvecs[i][j] * eigvals[j] * alphas[j];
|
}
|
}
|
|
int p = 0;
|
for (int h = 0; h < img_size; ++h) {
|
for (int w = 0; w < img_size; ++w) {
|
for (int c = 0; c < 3; ++c) {
|
img[p++] += delta_rgb[2 - c];
|
}
|
}
|
}
|
}
|
|
// assume HWC order and color channels BGR
|
// mean subtraction and scaling.
|
template <class Context>
|
void ColorNormalization(
|
float* img,
|
const int img_size,
|
const int channels,
|
const std::vector<float>& mean,
|
const std::vector<float>& std) {
|
int p = 0;
|
for (int h = 0; h < img_size; ++h) {
|
for (int w = 0; w < img_size; ++w) {
|
for (int c = 0; c < channels; ++c) {
|
img[p] = (img[p] - mean[c]) * std[c];
|
p++;
|
}
|
}
|
}
|
}
|
|
// Factored out image transformation
|
template <class Context>
|
void TransformImage(
|
const cv::Mat& scaled_img,
|
const int channels,
|
float* image_data,
|
const bool color_jitter,
|
const float saturation,
|
const float brightness,
|
const float contrast,
|
const bool color_lighting,
|
const float color_lighting_std,
|
const std::vector<std::vector<float>>& color_lighting_eigvecs,
|
const std::vector<float>& color_lighting_eigvals,
|
const int crop,
|
const bool mirror,
|
const std::vector<float>& mean,
|
const std::vector<float>& std,
|
std::mt19937* randgen,
|
std::bernoulli_distribution* mirror_this_image,
|
bool is_test = false) {
|
CAFFE_ENFORCE_GE(
|
scaled_img.rows, crop, "Image height must be bigger than crop.");
|
CAFFE_ENFORCE_GE(
|
scaled_img.cols, crop, "Image width must be bigger than crop.");
|
|
// find the cropped region, and copy it to the destination matrix
|
int width_offset, height_offset;
|
if (is_test) {
|
width_offset = (scaled_img.cols - crop) / 2;
|
height_offset = (scaled_img.rows - crop) / 2;
|
} else {
|
width_offset =
|
std::uniform_int_distribution<>(0, scaled_img.cols - crop)(*randgen);
|
height_offset =
|
std::uniform_int_distribution<>(0, scaled_img.rows - crop)(*randgen);
|
}
|
|
float* image_data_ptr = image_data;
|
if (!is_test && mirror && (*mirror_this_image)(*randgen)) {
|
// Copy mirrored image.
|
for (int h = height_offset; h < height_offset + crop; ++h) {
|
for (int w = width_offset + crop - 1; w >= width_offset; --w) {
|
const uint8_t* cv_data = scaled_img.ptr(h) + w * channels;
|
for (int c = 0; c < channels; ++c) {
|
*(image_data_ptr++) = static_cast<float>(cv_data[c]);
|
}
|
}
|
}
|
} else {
|
// Copy normally.
|
for (int h = height_offset; h < height_offset + crop; ++h) {
|
for (int w = width_offset; w < width_offset + crop; ++w) {
|
const uint8_t* cv_data = scaled_img.ptr(h) + w * channels;
|
for (int c = 0; c < channels; ++c) {
|
*(image_data_ptr++) = static_cast<float>(cv_data[c]);
|
}
|
}
|
}
|
}
|
|
if (color_jitter && channels == 3 && !is_test) {
|
ColorJitter<Context>(
|
image_data, crop, saturation, brightness, contrast, randgen);
|
}
|
if (color_lighting && channels == 3 && !is_test) {
|
ColorLighting<Context>(
|
image_data,
|
crop,
|
color_lighting_std,
|
color_lighting_eigvecs,
|
color_lighting_eigvals,
|
randgen);
|
}
|
|
// Color normalization
|
// Mean subtraction and scaling.
|
ColorNormalization<Context>(image_data, crop, channels, mean, std);
|
}
|
|
// Only crop / transose the image
|
// leave in uint8_t dataType
|
template <class Context>
|
void CropTransposeImage(
|
const cv::Mat& scaled_img,
|
const int channels,
|
uint8_t* cropped_data,
|
const int crop,
|
const bool mirror,
|
std::mt19937* randgen,
|
std::bernoulli_distribution* mirror_this_image,
|
bool is_test = false) {
|
CAFFE_ENFORCE_GE(
|
scaled_img.rows, crop, "Image height must be bigger than crop.");
|
CAFFE_ENFORCE_GE(
|
scaled_img.cols, crop, "Image width must be bigger than crop.");
|
|
// find the cropped region, and copy it to the destination matrix
|
int width_offset, height_offset;
|
if (is_test) {
|
width_offset = (scaled_img.cols - crop) / 2;
|
height_offset = (scaled_img.rows - crop) / 2;
|
} else {
|
width_offset =
|
std::uniform_int_distribution<>(0, scaled_img.cols - crop)(*randgen);
|
height_offset =
|
std::uniform_int_distribution<>(0, scaled_img.rows - crop)(*randgen);
|
}
|
|
if (mirror && (*mirror_this_image)(*randgen)) {
|
// Copy mirrored image.
|
for (int h = height_offset; h < height_offset + crop; ++h) {
|
for (int w = width_offset + crop - 1; w >= width_offset; --w) {
|
const uint8_t* cv_data = scaled_img.ptr(h) + w * channels;
|
for (int c = 0; c < channels; ++c) {
|
*(cropped_data++) = cv_data[c];
|
}
|
}
|
}
|
} else {
|
// Copy normally.
|
for (int h = height_offset; h < height_offset + crop; ++h) {
|
for (int w = width_offset; w < width_offset + crop; ++w) {
|
const uint8_t* cv_data = scaled_img.ptr(h) + w * channels;
|
for (int c = 0; c < channels; ++c) {
|
*(cropped_data++) = cv_data[c];
|
}
|
}
|
}
|
}
|
}
|
|
// Parse datum, decode image, perform transform
|
// Intended as entry point for binding to thread pool
|
template <class Context>
|
void ImageInputOp<Context>::DecodeAndTransform(
|
const std::string& value,
|
float* image_data,
|
int item_id,
|
const int channels,
|
std::size_t thread_index) {
|
CAFFE_ENFORCE((int)thread_index < num_decode_threads_);
|
|
std::bernoulli_distribution mirror_this_image(0.5f);
|
std::mt19937* randgen = &(randgen_per_thread_[thread_index]);
|
|
cv::Mat img;
|
// Decode the image
|
PerImageArg info;
|
CHECK(
|
GetImageAndLabelAndInfoFromDBValue(value, &img, info, item_id, randgen));
|
// Factor out the image transformation
|
TransformImage<Context>(
|
img,
|
channels,
|
image_data,
|
color_jitter_,
|
img_saturation_,
|
img_brightness_,
|
img_contrast_,
|
color_lighting_,
|
color_lighting_std_,
|
color_lighting_eigvecs_,
|
color_lighting_eigvals_,
|
crop_,
|
mirror_,
|
mean_,
|
std_,
|
randgen,
|
&mirror_this_image,
|
is_test_);
|
}
|
|
template <class Context>
|
void ImageInputOp<Context>::DecodeAndTransposeOnly(
|
const std::string& value,
|
uint8_t* image_data,
|
int item_id,
|
const int channels,
|
std::size_t thread_index) {
|
CAFFE_ENFORCE((int)thread_index < num_decode_threads_);
|
|
std::bernoulli_distribution mirror_this_image(0.5f);
|
std::mt19937* randgen = &(randgen_per_thread_[thread_index]);
|
|
cv::Mat img;
|
// Decode the image
|
PerImageArg info;
|
CHECK(
|
GetImageAndLabelAndInfoFromDBValue(value, &img, info, item_id, randgen));
|
|
// Factor out the image transformation
|
CropTransposeImage<Context>(
|
img,
|
channels,
|
image_data,
|
crop_,
|
mirror_,
|
randgen,
|
&mirror_this_image,
|
is_test_);
|
}
|
|
template <class Context>
|
bool ImageInputOp<Context>::Prefetch() {
|
if (!owned_reader_.get()) {
|
// if we are not owning the reader, we will get the reader pointer from
|
// input. Otherwise the constructor should have already set the reader
|
// pointer.
|
reader_ = &OperatorBase::Input<db::DBReader>(0);
|
}
|
const int channels = color_ ? 3 : 1;
|
// Call mutable_data() once to allocate the underlying memory.
|
if (gpu_transform_) {
|
// we'll transfer up in int8, then convert later
|
prefetched_image_.mutable_data<uint8_t>();
|
} else {
|
prefetched_image_.mutable_data<float>();
|
}
|
|
prefetched_label_.mutable_data<int>();
|
// Prefetching handled with a thread pool of "decode_threads" threads.
|
|
for (int item_id = 0; item_id < batch_size_; ++item_id) {
|
std::string key, value;
|
cv::Mat img;
|
|
// read data
|
reader_->Read(&key, &value);
|
|
// determine label type based on first item
|
if (item_id == 0) {
|
if (use_caffe_datum_) {
|
prefetched_label_.mutable_data<int>();
|
} else {
|
TensorProtos protos;
|
CAFFE_ENFORCE(protos.ParseFromString(value));
|
TensorProto_DataType labeldt = protos.protos(1).data_type();
|
if (labeldt == TensorProto::INT32) {
|
prefetched_label_.mutable_data<int>();
|
} else if (labeldt == TensorProto::FLOAT) {
|
prefetched_label_.mutable_data<float>();
|
} else {
|
LOG(FATAL) << "Unsupported label type.";
|
}
|
|
for (int i = 0; i < additional_inputs_count_; ++i) {
|
int index = additional_inputs_offset_ + i;
|
TensorProto additional_output_proto = protos.protos(index);
|
auto sizes =
|
std::vector<int64_t>({batch_size_, additional_output_sizes_[i]});
|
if (additional_output_proto.data_type() == TensorProto::FLOAT) {
|
prefetched_additional_outputs_[i] =
|
caffe2::empty(sizes, at::dtype<float>().device(CPU));
|
} else if (
|
additional_output_proto.data_type() == TensorProto::INT32) {
|
prefetched_additional_outputs_[i] =
|
caffe2::empty(sizes, at::dtype<int>().device(CPU));
|
} else if (
|
additional_output_proto.data_type() == TensorProto::INT64) {
|
prefetched_additional_outputs_[i] =
|
caffe2::empty(sizes, at::dtype<int64_t>().device(CPU));
|
} else if (
|
additional_output_proto.data_type() == TensorProto::UINT8) {
|
prefetched_additional_outputs_[i] =
|
caffe2::empty(sizes, at::dtype<uint8_t>().device(CPU));
|
} else {
|
LOG(FATAL) << "Unsupported output type.";
|
}
|
}
|
}
|
}
|
|
// launch into thread pool for processing
|
// TODO: support color jitter and color lighting in gpu_transform
|
if (gpu_transform_) {
|
// output of decode will still be int8
|
uint8_t* image_data = prefetched_image_.mutable_data<uint8_t>() +
|
crop_ * crop_ * channels * item_id;
|
thread_pool_->runTaskWithID(std::bind(
|
&ImageInputOp<Context>::DecodeAndTransposeOnly,
|
this,
|
std::string(value),
|
image_data,
|
item_id,
|
channels,
|
std::placeholders::_1));
|
} else {
|
float* image_data = prefetched_image_.mutable_data<float>() +
|
crop_ * crop_ * channels * item_id;
|
thread_pool_->runTaskWithID(std::bind(
|
&ImageInputOp<Context>::DecodeAndTransform,
|
this,
|
std::string(value),
|
image_data,
|
item_id,
|
channels,
|
std::placeholders::_1));
|
}
|
}
|
thread_pool_->waitWorkComplete();
|
|
// we allow to get at most max_decode_error_ratio from
|
// opencv imdecode until raising a runtime exception
|
if ((float)num_decode_errors_in_batch_ / batch_size_ >
|
max_decode_error_ratio_) {
|
throw std::runtime_error(
|
"max_decode_error_ratio exceeded " +
|
c10::to_string(max_decode_error_ratio_));
|
}
|
|
// If the context is not CPUContext, we will need to do a copy in the
|
// prefetch function as well.
|
auto device = at::device(Context::GetDeviceType());
|
if (!std::is_same<Context, CPUContext>::value) {
|
// do sync copies
|
ReinitializeAndCopyFrom(
|
&prefetched_image_on_device_, device, prefetched_image_);
|
ReinitializeAndCopyFrom(
|
&prefetched_label_on_device_, device, prefetched_label_);
|
|
for (int i = 0; i < prefetched_additional_outputs_on_device_.size(); ++i) {
|
ReinitializeAndCopyFrom(
|
&prefetched_additional_outputs_on_device_[i],
|
device,
|
prefetched_additional_outputs_[i]);
|
}
|
}
|
|
num_decode_errors_in_batch_ = 0;
|
|
return true;
|
}
|
|
template <class Context>
|
bool ImageInputOp<Context>::CopyPrefetched() {
|
auto type = Device(Context::GetDeviceType());
|
auto options = at::device(type);
|
|
// Note(jiayq): The if statement below should be optimized away by the
|
// compiler since std::is_same is a constexpr.
|
if (std::is_same<Context, CPUContext>::value) {
|
OperatorBase::OutputTensorCopyFrom(
|
0, options, prefetched_image_, /* async */ true);
|
OperatorBase::OutputTensorCopyFrom(
|
1, options, prefetched_label_, /* async */ true);
|
|
for (int i = 2; i < OutputSize(); ++i) {
|
OperatorBase::OutputTensorCopyFrom(
|
i, options, prefetched_additional_outputs_[i - 2], /* async */ true);
|
}
|
} else {
|
// TODO: support color jitter and color lighting in gpu_transform
|
if (gpu_transform_) {
|
if (!mean_std_copied_) {
|
ReinitializeTensor(
|
&mean_gpu_,
|
{static_cast<int64_t>(mean_.size())},
|
at::dtype<float>().device(Context::GetDeviceType()));
|
ReinitializeTensor(
|
&std_gpu_,
|
{static_cast<int64_t>(std_.size())},
|
at::dtype<float>().device(Context::GetDeviceType()));
|
|
context_.template CopyFromCPU<float>(
|
mean_.size(),
|
mean_.data(),
|
mean_gpu_.template mutable_data<float>());
|
context_.template CopyFromCPU<float>(
|
std_.size(), std_.data(), std_gpu_.template mutable_data<float>());
|
mean_std_copied_ = true;
|
}
|
const auto& X = prefetched_image_on_device_;
|
// data comes in as NHWC
|
const int N = X.dim32(0), C = X.dim32(3), H = X.dim32(1), W = X.dim32(2);
|
// data goes out as NCHW
|
auto dims = std::vector<int64_t>{N, C, H, W};
|
if (!ApplyTransformOnGPU(dims, type)) {
|
return false;
|
}
|
|
} else {
|
OperatorBase::OutputTensorCopyFrom(
|
0, type, prefetched_image_on_device_, /* async */ true);
|
}
|
OperatorBase::OutputTensorCopyFrom(
|
1, type, prefetched_label_on_device_, /* async */ true);
|
|
for (int i = 2; i < OutputSize(); ++i) {
|
OperatorBase::OutputTensorCopyFrom(
|
i,
|
type,
|
prefetched_additional_outputs_on_device_[i - 2],
|
/* async */ true);
|
}
|
}
|
return true;
|
}
|
} // namespace caffe2
|
|
#endif // CAFFE2_IMAGE_IMAGE_INPUT_OP_H_
|