#include <algorithm>
|
#include <vector>
|
#include "caffe2/core/tensor.h"
|
#include "caffe2/utils/eigen_utils.h"
|
#include "caffe2/utils/math.h"
|
|
namespace caffe2 {
|
namespace {
|
|
using t_tuple = std::tuple<Tensor, Tensor>;
|
|
template <typename T>
|
T copy_ctor(const T& x) {
|
return x;
|
}
|
|
template <>
|
Tensor copy_ctor(const Tensor& X) {
|
return X.UnsafeSharedInstance();
|
}
|
|
template <>
|
t_tuple copy_ctor(const t_tuple& X) {
|
return std::make_tuple(copy_ctor(std::get<0>(X)), copy_ctor(std::get<1>(X)));
|
}
|
|
template <>
|
std::pair<t_tuple, t_tuple> copy_ctor(const std::pair<t_tuple, t_tuple>& X) {
|
return std::make_pair(copy_ctor(X.first), copy_ctor(X.second));
|
}
|
|
template <>
|
std::vector<Tensor> copy_ctor(const std::vector<Tensor>& X) {
|
std::vector<Tensor> Y(X.size());
|
std::transform(X.begin(), X.end(), Y.begin(), [](const Tensor& x) {
|
return copy_ctor(x);
|
});
|
return Y;
|
}
|
|
template <>
|
std::vector<t_tuple> copy_ctor(const std::vector<t_tuple>& X) {
|
std::vector<t_tuple> Y(X.size());
|
std::transform(X.begin(), X.end(), Y.begin(), [](const t_tuple& x) {
|
return copy_ctor(x);
|
});
|
return Y;
|
}
|
|
template <>
|
std::vector<std::pair<t_tuple, t_tuple>> copy_ctor(
|
const std::vector<std::pair<t_tuple, t_tuple>>& X) {
|
std::vector<std::pair<t_tuple, t_tuple>> Y(X.size());
|
std::transform(
|
X.begin(), X.end(), Y.begin(), [](const std::pair<t_tuple, t_tuple>& x) {
|
return copy_ctor(x);
|
});
|
return Y;
|
}
|
|
// Gathers every two elements of a vector in a vector of pairs
|
template <typename T>
|
static std::vector<std::pair<T, T>> pair_vec(const std::vector<T>& vals) {
|
CAFFE_ENFORCE_EQ(
|
vals.size() % 2,
|
0,
|
"Odd number of params or hiddens given to a bidirectional RNN");
|
std::vector<std::pair<T, T>> result;
|
result.reserve(vals.size() / 2);
|
for (int64_t i = 0; i < vals.size(); i += 2) {
|
result.emplace_back(copy_ctor(vals[i]), copy_ctor(vals[i + 1]));
|
}
|
return result;
|
}
|
|
// Flattens a vector of pairs
|
template <typename T>
|
static std::vector<T> unpair_vec(std::vector<std::pair<T, T>>&& vals) {
|
std::vector<T> result;
|
result.reserve(vals.size() * 2);
|
for (int64_t i = 0; i < vals.size(); i++) {
|
result.push_back(std::move(vals[i].first));
|
result.push_back(std::move(vals[i].second));
|
}
|
return result;
|
}
|
|
Tensor matmul(const Tensor& X, const Tensor& W, CPUContext* context) {
|
const auto canonical_axis = X.canonical_axis_index(1);
|
const auto M = X.size_to_dim(canonical_axis);
|
const auto K = X.size_from_dim(canonical_axis);
|
const auto canonical_axis_w = W.canonical_axis_index(1);
|
const int N = W.size_to_dim(canonical_axis_w);
|
auto output_size = X.sizes().vec();
|
output_size.resize(canonical_axis + 1);
|
output_size[canonical_axis] = N;
|
Tensor C(output_size, CPU);
|
math::Gemm<float, CPUContext>(
|
CblasNoTrans,
|
CblasTrans,
|
M,
|
N,
|
K,
|
1,
|
X.template data<float>(),
|
W.template data<float>(),
|
0,
|
C.template mutable_data<float>(),
|
context);
|
return C;
|
}
|
|
Tensor
|
linear(const Tensor& X, const Tensor& W, const Tensor& B, CPUContext* context) {
|
auto output = matmul(X, W, context);
|
if (B) {
|
const auto canonical_axis = X.canonical_axis_index(1);
|
const auto M = X.size_to_dim(canonical_axis);
|
const auto canonical_axis_w = W.canonical_axis_index(1);
|
const int N = W.size_to_dim(canonical_axis_w);
|
auto bias_multiplier_ = caffe2::empty({M}, CPU);
|
math::Set<float, CPUContext>(
|
M, 1, bias_multiplier_.template mutable_data<float>(), context);
|
math::Gemm<float, CPUContext>(
|
CblasNoTrans,
|
CblasNoTrans,
|
M,
|
N,
|
1,
|
1,
|
bias_multiplier_.template data<float>(),
|
B.template data<float>(),
|
1,
|
output.template mutable_data<float>(),
|
context);
|
}
|
return output;
|
}
|
|
std::vector<Tensor>
|
chunk(const Tensor& input, int chunks, int axis, CPUContext* context) {
|
int canonical_axis = input.canonical_axis_index(axis);
|
CAFFE_ENFORCE_LT(
|
canonical_axis, input.dim(), "Axis not in input ndim range.");
|
const int input_channels = input.dim32(canonical_axis);
|
CAFFE_ENFORCE_EQ(
|
input_channels % chunks,
|
0,
|
"input channels should be divisible by the number of chunks.");
|
auto split_size = input_channels / chunks;
|
vector<int64_t> output_dims(input.sizes().vec());
|
int before = 1, after = 1;
|
for (int i = 0; i < canonical_axis; ++i) {
|
before *= input.dim32(i);
|
}
|
for (int i = canonical_axis + 1; i < input.dim(); ++i) {
|
after *= input.dim32(i);
|
}
|
size_t input_offset = 0;
|
std::vector<Tensor> outputs;
|
for (int i = 0; i < chunks; ++i) {
|
auto axis_dim = split_size;
|
output_dims[canonical_axis] = split_size;
|
Tensor output(output_dims, CPU);
|
math::CopyMatrix<CPUContext>(
|
input.itemsize(),
|
before,
|
axis_dim * after,
|
static_cast<const char*>(input.raw_data()) + input_offset,
|
input.dim32(canonical_axis) * after,
|
output.raw_mutable_data(input.dtype()),
|
axis_dim * after,
|
context,
|
input.dtype().copy());
|
input_offset += axis_dim * after * input.itemsize();
|
outputs.push_back(std::move(output));
|
}
|
return outputs;
|
}
|
|
std::vector<Tensor> unbind(const Tensor& input, int axis, CPUContext* context) {
|
// 1 - Chunk the input tensor along the given axis into N chunks where
|
// N is the dim(axis)
|
auto chunks = chunk(input, input.sizes()[axis], axis, context);
|
// 2 - Compute new dimensions
|
std::vector<int64_t> newDims = input.sizes().vec();
|
newDims.erase(newDims.begin() + axis);
|
|
// 3 - Reshape chunks to drop the extra dimension
|
for (int i = 0; i < chunks.size(); i++) {
|
CAFFE_ENFORCE_EQ(
|
chunks[i].sizes()[axis], 1, "Got an unexpected chunk size");
|
chunks[i].Reshape(newDims);
|
}
|
return chunks;
|
}
|
|
Tensor
|
cat(const std::vector<Tensor>& tensorList, int axis, CPUContext* context) {
|
// Adopted from C2's concat operator
|
auto input_zero = copy_ctor(tensorList.at(0));
|
vector<int64_t> outputDims(input_zero.sizes().vec());
|
CAFFE_ENFORCE(outputDims.size() > 0);
|
for (int i = 1; i < tensorList.size(); i++) {
|
CAFFE_ENFORCE(input_zero.dtype() == tensorList.at(i).dtype());
|
outputDims[axis] += tensorList.at(i).sizes()[axis];
|
}
|
auto output_channels = outputDims[axis];
|
Tensor output(outputDims, CPU);
|
int before = 1, after = 1;
|
for (int i = 0; i < tensorList.at(0).dim(); ++i) {
|
if (i == axis) {
|
continue;
|
}
|
int dim = input_zero.dim32(i);
|
if (i < axis) {
|
before *= dim;
|
} else {
|
after *= dim;
|
}
|
}
|
size_t output_offset = 0;
|
for (const auto& input : tensorList) {
|
auto axis_dim = input.dim32(axis);
|
math::CopyMatrix<CPUContext>(
|
input.itemsize(),
|
before,
|
axis_dim * after,
|
input.raw_data(),
|
axis_dim * after,
|
static_cast<char*>(output.raw_mutable_data(input_zero.dtype())) +
|
output_offset,
|
output_channels * after,
|
context,
|
input_zero.dtype().copy());
|
output_offset += axis_dim * after * input.itemsize();
|
}
|
|
return output;
|
}
|
|
Tensor
|
stack(const std::vector<Tensor>& tensorList, int axis, CPUContext* context) {
|
// 1 - Compute new dimensions
|
std::vector<int64_t> newDims(tensorList[0].sizes().vec());
|
std::vector<Tensor> expandedTensorList;
|
newDims.insert(newDims.begin() + axis, 1);
|
for (int i = 0; i < tensorList.size(); i++) {
|
expandedTensorList.emplace_back(tensorList[i].Clone());
|
expandedTensorList.at(i).Reshape(newDims);
|
}
|
return cat(expandedTensorList, axis, context);
|
}
|
|
Tensor sigmoid(const Tensor& X) {
|
Tensor Y(X.sizes(), CPU);
|
auto N = X.numel();
|
EigenVectorArrayMap<float>(Y.template mutable_data<float>(), N) = 1.0 /
|
(1.0 +
|
(-ConstEigenVectorArrayMap<float>(X.template data<float>(), N)).exp());
|
return Y;
|
}
|
|
Tensor tanh(const Tensor& X, CPUContext* context) {
|
Tensor Y(X.sizes(), CPU);
|
math::Tanh<float, CPUContext>(
|
X.numel(),
|
X.template data<float>(),
|
Y.template mutable_data<float>(),
|
context);
|
return Y;
|
}
|
|
Tensor add(const Tensor& X, const Tensor& Y, CPUContext* context) {
|
Tensor Z(X.sizes().vec(), CPU);
|
math::Add<float, CPUContext>(
|
X.numel(),
|
X.template data<float>(),
|
Y.template data<float>(),
|
Z.template mutable_data<float>(),
|
context);
|
return Z;
|
}
|
|
Tensor mul(const Tensor& X, const Tensor& Y, CPUContext* context) {
|
Tensor Z(X.sizes().vec(), CPU);
|
math::Mul<float, CPUContext>(
|
X.numel(),
|
X.template data<float>(),
|
Y.template data<float>(),
|
Z.template mutable_data<float>(),
|
context);
|
return Z;
|
}
|
|
Tensor transpose(const Tensor& X, int dim0, int dim1, CPUContext* context) {
|
int ndim = X.dim();
|
CAFFE_ENFORCE(ndim > dim0 && ndim > dim1, "Invalid transpose dimensions");
|
std::vector<int> axes(ndim);
|
std::iota(axes.begin(), axes.end(), 0);
|
std::swap(axes[dim0], axes[dim1]);
|
const std::vector<std::int64_t> X_dims = X.sizes().vec();
|
std::vector<std::int64_t> Y_dims(ndim);
|
for (int i = 0; i < ndim; ++i) {
|
Y_dims[i] = X_dims[axes[i]];
|
}
|
Tensor Y(Y_dims, CPU);
|
math::Transpose<std::int64_t, float, CPUContext>(
|
ndim,
|
X_dims.data(),
|
axes.data(),
|
X.template data<float>(),
|
Y.template mutable_data<float>(),
|
context);
|
return Y;
|
}
|
} // namespace
|
} // namespace caffe2
|