#ifndef CAFFE2_CORE_TENSOR_H_
|
#define CAFFE2_CORE_TENSOR_H_
|
|
#include "caffe2/core/storage.h"
|
#include "caffe2/core/tensor_impl.h"
|
|
#include <ATen/core/UndefinedTensorImpl.h>
|
#include <c10/util/intrusive_ptr.h>
|
#if !defined(CAFFE2_IS_XPLAT_BUILD)
|
#include "ATen/core/Tensor.h"
|
#endif
|
#include <c10/core/TensorOptions.h>
|
|
#include <ATen/core/grad_mode.h>
|
|
namespace caffe2 {
|
|
using at::UndefinedTensorImpl;
|
|
/**
|
* @brief Tensor class holds a shared pointer to the implementation TensorImpl,
|
* redirects API calls to TensorImpl;
|
* Copying of Tensor results in sharing the same underlying implementation
|
* object
|
*
|
* NB: See TensorImpl for documentation on these methods.
|
*/
|
class CAFFE2_API Tensor final {
|
private:
|
enum Unsafe { IDoWantAliasing };
|
Tensor(const Tensor& other, Unsafe _) : impl_(other.getIntrusivePtr()) {}
|
|
protected:
|
using TensorImplPtr = c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl>;
|
TensorImplPtr impl_;
|
|
void enforce_invariants();
|
|
public:
|
Tensor() : impl_() {}
|
|
// caffe2::Tensor is explicitly marked as moveable-only because before
|
// the refactoring the class used to be a value type and a lot of user code
|
// is written this way. With PyTorch unification, caffe2::Tensor actually
|
// has semantics of a shared_ptr now (via intrusive_ptr). However, to prevent
|
// accidental mistakes when changing legacy code we keep caffe2::Tensor
|
// to have movable semantics.
|
//
|
// If you need to get a pointer to the same Tensor instance (not to be
|
// confused with shared storage), `UnsafeSharedInstance` can be used. It has
|
// the same behavior as `at::Tensor a = b`.
|
Tensor(const Tensor&) = delete;
|
Tensor& operator=(const Tensor&) = delete;
|
Tensor(Tensor&&) = default;
|
Tensor& operator=(Tensor&&) = default;
|
|
operator bool() const {
|
return impl_.defined();
|
}
|
|
TensorImpl* unsafeGetTensorImpl() const {
|
return impl_.get();
|
}
|
|
Tensor UnsafeSharedInstance() const {
|
return Tensor(*this, IDoWantAliasing);
|
}
|
|
/**
|
* @brief Creates a tensor of the given device type.
|
*
|
* Note that the actual data allocation is not going to be carried out until
|
* you resize the tensor and then call mutable_data().
|
*/
|
explicit Tensor(at::Device device)
|
: impl_(c10::make_intrusive<TensorImpl, UndefinedTensorImpl>(
|
Storage::create_legacy(device, TypeMeta()),
|
c10::computeTensorTypeId(at::device(device).layout(at::kStrided))
|
)) {
|
}
|
|
/**
|
* @brief Creates a tensor of the given dimension.
|
*
|
* Note that the actual data allocation is not going to be carried out until
|
* the first time mutable_data() is called.
|
*/
|
explicit Tensor(at::IntArrayRef dims, DeviceType type) : Tensor(type) {
|
// TODO: here, we create a Storage
|
// and immediately discard it in Resize() since
|
// reset_tensor will be true and FreeMemory will be called,
|
// we might want to avoid creating Storage twice?
|
Resize(dims);
|
}
|
|
// we want to preserve index information
|
explicit Tensor(at::IntArrayRef dims, at::Device device): Tensor(device) {
|
Resize(dims);
|
}
|
|
// TODO: remove?
|
explicit Tensor(const vector<int>& dims, DeviceType type)
|
: Tensor(type) {
|
Resize(dims);
|
}
|
|
/**
|
* @brief: Create a Tensor of at::DeviceType `type` and initialize it with
|
* src Tensor
|
*/
|
Tensor(const Tensor& src, DeviceType type)
|
: Tensor(type) {
|
CopyFrom(src);
|
}
|
|
/**
|
* @brief Mutual conversion with at::Tensor
|
*
|
* The tensor will share the same instance (data, strides, sizes, etc) but
|
* a different subset of APIs would be available
|
*/
|
#if !defined(CAFFE2_IS_XPLAT_BUILD)
|
explicit Tensor(at::Tensor tensor)
|
: impl_(std::move(tensor.impl_)) {
|
enforce_invariants();
|
}
|
|
explicit operator at::Tensor() const& {
|
return at::Tensor::wrap_tensor_impl(impl_);
|
}
|
|
explicit operator at::Tensor() && {
|
return at::Tensor::wrap_tensor_impl(std::move(impl_));
|
}
|
#endif
|
|
bool is_same(const Tensor& other) const noexcept {
|
return impl_ == other.impl_;
|
}
|
|
Tensor Clone() const {
|
Tensor x(GetDevice());
|
x.CopyFrom(*this);
|
return x;
|
}
|
|
/**
|
* Clone self as a Tensor that share the same Storage,
|
* that is, both Tensors are views on the same Storage.
|
* If we change the sizes or strides of one Tensor, it
|
* does not affect the other Tensor that it shares Storage
|
* with.
|
* A similar yet different usage is `Tensor x = y;`, this
|
* will make x and y pointing to the same Tensor and resizing
|
* one of them will resize the other as well.
|
*
|
* TODO: Deduplicate this with THTensor_(newWithTensor)
|
* (exposed in ATen as at::alias but not otherwise available)
|
*/
|
Tensor Alias() const {
|
Tensor x(sizes(), GetDevice());
|
if (!dtype_initialized()) {
|
C10_LOG_EVERY_MS(WARNING, 1000) <<
|
"Cloning a tensor that don't have a data type (did you call mutable_data<T> on the tensor?)";
|
}
|
AT_ASSERTM(
|
storage_initialized(),
|
"Cloning a tensor that has no content and has size > 0");
|
// set_storage already sets data_type_ of TensorImpl
|
x.impl_->set_storage(storage());
|
x.impl_->set_storage_offset(impl_->storage_offset());
|
x.impl_->set_sizes_and_strides(sizes(), strides());
|
return x;
|
}
|
|
DeviceType GetDeviceType() const {
|
return impl_->device_type();
|
}
|
|
at::Device GetDevice() const {
|
return impl_.get()->device();
|
}
|
|
/**
|
* @brief Copies the data from a source tensor, with a context provided to
|
* carry out the underlying memcpy operation. This method respects
|
* caffe2_keep_on_shrink.
|
*
|
* After CopyFrom, this function guarantees that the destination tensor will
|
* have the same initialization state and dtype as src. This function
|
* preserves the DeviceType of the source tensor (so, e.g., if you allocate
|
* a tensor on CPU and then CopyFrom a CUDA tensor, that will to a
|
* CUDA-to-CPU transfer).
|
*
|
* 'async' parameter triggers async copy for CUDA tensors
|
*/
|
void CopyFrom(const Tensor& src, bool async = false) {
|
// TODO: only check `!impl_->requires_grad()` after Variable and Tensor are merged
|
AT_ASSERT(!impl_->is_variable() || !(impl_->requires_grad() && at::GradMode::is_enabled()));
|
AT_ASSERTM(
|
src.impl_->is_contiguous(),
|
"Right now only copy of contiguous source Tensor is supported.");
|
AT_ASSERTM(
|
src.impl_->storage_initialized(),
|
"Cannot copy from an uninitialized Tensor");
|
|
if (src.impl_.get() == impl_.get()) {
|
return;
|
}
|
|
// Test if we need to allocate a new storage
|
// Uninitialized storages are guaranteed to be uniquely owned,
|
// so we don't need to swap in dst case.
|
// If the dtype changed, we need to reallocate storage.
|
if (impl_->dtype() != src.impl_->dtype()) {
|
// NB: copy preserves device_type
|
// This storage will get initialized by the mutable_data call below.
|
impl_->set_storage(at::Storage::create_legacy(impl_->device_type(), src.impl_->dtype()));
|
}
|
impl_->Resize(src.impl_->sizes());
|
|
if (impl_->numel() > 0) {
|
if (impl_->dtype().copy()) {
|
AT_ASSERTM(
|
impl_->device_type() == ::at::DeviceType::CPU,
|
"In CopyFrom source and dest tensors must both be CPU for "
|
"non-POD copy, but dest tensor was ",
|
impl_->device_type());
|
AT_ASSERTM(
|
src.impl_->device_type() == ::at::DeviceType::CPU,
|
"In CopyFrom source and dest tensors must both be CPU for "
|
"non-POD copy, but src tensor was ",
|
src.impl_->device_type());
|
impl_->dtype().copy()(src.impl_->data(), impl_->raw_mutable_data(impl_->dtype()), impl_->numel());
|
} else {
|
// The following copy uses the current (thread local) stream for copying
|
// and also takes the GPU id from the device() field passed in.
|
//
|
// TODO: Potentially more enforcements are necessary to avoid accidental
|
// switch to sync copy if the currently set device is wrong.
|
//
|
// Specifically, we might need to switch to a different context device
|
// here explicitly to avoid relying on user synchronizing things
|
// properly.
|
//
|
// note: raw_mutable_data initializes device here
|
void* new_data = impl_->raw_mutable_data(impl_->dtype());
|
at::CopyBytes(
|
impl_->numel() * impl_->itemsize(),
|
src.impl_->data(),
|
src.impl_->device(),
|
new_data,
|
impl_->device(),
|
async);
|
}
|
}
|
}
|
|
/**
|
* @brief Extend the outer-most dimension of this tensor
|
* to dimension of `num`.
|
*/
|
void ExtendTo(int64_t num, float growthPct) const {
|
CAFFE_ENFORCE_GE_WITH_CALLER(impl_->dim(), 1);
|
CAFFE_ENFORCE_GE_WITH_CALLER(growthPct, 0);
|
Extend(num - impl_->size(0), growthPct);
|
}
|
|
void Extend(int64_t num, float growthPct) const {
|
impl_.get()->Extend(num, growthPct);
|
}
|
|
/**
|
* @brief Shrinks the outer-most dimension to given size, keeping the data.
|
*
|
* This method guarantees that no re-allocations are carried out, which means
|
* that the extra capacity after the end of the shrunk tensor is maintained.
|
* Notably, this function does NOT respect caffe2_keep_on_shrink.
|
*/
|
void ShrinkTo(int64_t outer_dim) const {
|
CAFFE_ENFORCE_WITH_CALLER(
|
impl_->is_contiguous(),
|
"Right now ShrinkTo is only supported on contiguous Tensor.");
|
CAFFE_ENFORCE_WITH_CALLER(impl_->dim() >= 1, "Tensor must be at least 1D");
|
CAFFE_ENFORCE_WITH_CALLER(
|
outer_dim <= impl_->size(0),
|
"New outer dimension must be smaller than current.");
|
CAFFE_ENFORCE(
|
impl_->storage().unique(),
|
"Can't call ShrinkTo on shared storage, please call Resize instead.");
|
impl_.get()->set_size(0, outer_dim);
|
}
|
|
template <class T>
|
void ReserveSpace(const T& outer_dim) const {
|
impl_.get()->ReserveSpace(outer_dim);
|
}
|
|
template <typename... Ts>
|
void Resize(Ts... dim_source) const {
|
impl_.get()->Resize(dim_source...);
|
}
|
|
/**
|
* Resize the tensor like the source tensor. Note that this is just a
|
* sugar wrapper that essentially calls Resize(src_tensor.dims()).
|
* This method respects caffe2_keep_on_shrink.
|
*/
|
inline void ResizeLike(const Tensor& src_tensor) const {
|
CAFFE_ENFORCE_WITH_CALLER(
|
src_tensor.is_contiguous(),
|
"Right now ResizeLike is only supported for contiguous Tensor.");
|
if (impl_ != src_tensor.impl_) {
|
impl_.get()->Resize(src_tensor.sizes());
|
}
|
}
|
|
inline void Reshape(const vector<int64_t>& dims) const {
|
impl_.get()->Reshape(dims);
|
}
|
|
inline void Reshape(const vector<int>& dims) const {
|
impl_.get()->Reshape(ToVectorint64_t(dims));
|
}
|
|
inline void FreeMemory() const {
|
impl_.get()->FreeMemory();
|
}
|
|
/**
|
* A utility function to print the debug string for the tensor. Note that this
|
* is very slow since it involves quite some string operations, so do not use
|
* it in your performance-critical code.
|
*/
|
string DebugString() const {
|
std::stringstream ss;
|
ss << "A Tensor of item size " << impl_->storage().itemsize() << " and type "
|
<< impl_->dtype().name() << " and dimension (";
|
for (int d : impl_->sizes()) {
|
ss << d << ",";
|
}
|
ss << ").";
|
return ss.str();
|
}
|
|
// To be deprecated
|
void ShareData(const Tensor& src) const {
|
impl_.get()->ShareData(*src.impl_.get());
|
}
|
|
/**
|
* @brief Shares the data with an externally managed pointer.
|
*
|
* This is similar to ShareData() but the source is a pointer with an advanced
|
* deleter option. In default, no deletion takes place, and one needs to make
|
* sure that the external memory is deallocated only after the tensor finishes
|
* using it. If a Deleter object is passed in, when this tensor is reallocated
|
* or freed, the deleter function is going to be called.
|
*/
|
template <typename T>
|
void ShareExternalPointer(
|
T* src,
|
size_t capacity = 0,
|
MemoryDeleter d = nullptr) const {
|
ShareExternalPointer((void*)src, caffe2::TypeMeta::Make<T>(), capacity, d);
|
}
|
|
template <typename T>
|
void ShareExternalPointer(at::DataPtr&& data_ptr, size_t capacity = 0) const {
|
ShareExternalPointer(std::move(data_ptr), caffe2::TypeMeta::Make<T>(), capacity);
|
}
|
|
void ShareExternalPointer(
|
void* src,
|
const TypeMeta& data_type,
|
size_t capacity = 0,
|
MemoryDeleter d = nullptr) const {
|
CAFFE_ENFORCE_WITH_CALLER(
|
impl_->is_contiguous(),
|
"Right now ShareExternalPointer is only supported for contiguous Tensor.");
|
CAFFE_ENFORCE_WITH_CALLER(
|
data_type.id() != caffe2::TypeIdentifier::uninitialized(),
|
"To share with a raw external pointer you need to pass in an "
|
"initialized data_type(TypeMeta).");
|
impl_.get()->ShareExternalPointer(
|
at::DataPtr(src, src, d, impl_->device_type()), data_type, capacity);
|
}
|
|
void ShareExternalPointer(
|
at::DataPtr&& data_ptr,
|
const TypeMeta& data_type,
|
size_t capacity) {
|
impl_.get()->ShareExternalPointer(std::move(data_ptr), data_type, capacity);
|
}
|
|
const c10::intrusive_ptr<TensorImpl, UndefinedTensorImpl>& getIntrusivePtr()
|
const {
|
return impl_;
|
}
|
|
bool defined() const {
|
return impl_;
|
}
|
|
/**
|
* Returns a raw void* pointer of the underlying storage. mutable_data()
|
* or raw_mutable_data() must have been called prior to this function call.
|
*/
|
inline void* raw_data() const {
|
return impl_->data();
|
}
|
|
template <typename T>
|
inline T* data() const {
|
return impl_.get()->data<T>();
|
}
|
|
inline void* raw_mutable_data(const TypeMeta& meta) const {
|
return impl_.get()->raw_mutable_data(meta);
|
}
|
|
/**
|
* Returns a mutable raw pointer of the underlying storage. This can only be
|
* used when you know for sure that the underlying storage of the tensor is
|
* already created via an earlier raw_mutable_data(meta) call or a
|
* mutable_data<T>() call.
|
*
|
* If the existing data does not match the desired type, it will be deleted
|
* and a new storage will be created.
|
*/
|
inline void* raw_mutable_data() const {
|
const auto& data_type = impl_->dtype();
|
CAFFE_ENFORCE_WITH_CALLER(
|
data_type.id() != caffe2::TypeIdentifier::uninitialized(),
|
"Calling raw_mutable_data() without meta, but the current meta is "
|
"of unknown type.");
|
return raw_mutable_data(data_type);
|
}
|
|
template <typename T>
|
inline T* mutable_data() const {
|
return impl_.get()->mutable_data<T>();
|
}
|
|
/**
|
* Returns the number of dimensions of the data.
|
*/
|
inline int dim() const {
|
return impl_->dim();
|
}
|
|
/**
|
* (To be deprecated) Returns the number of dimensions of the data.
|
*/
|
inline int ndim() const {
|
return impl_->dim();
|
}
|
|
/**
|
* (To be deprecated) Returns the size (i.e. the number of items) of the
|
* tensor.
|
*/
|
inline int64_t size() const {
|
return impl_->numel();
|
}
|
|
/**
|
* Returns the number of items of the tensor.
|
*/
|
inline int64_t numel() const {
|
return impl_->numel();
|
}
|
|
/**
|
* Return the number of bytes each item takes in the tensor.
|
*/
|
inline size_t itemsize() const {
|
return impl_->storage().itemsize();
|
}
|
|
/**
|
* Returns the total number of bytes of the storage.
|
*
|
* This is equivalent to calling size() * itemsize().
|
*/
|
inline size_t nbytes() const {
|
return impl_->numel() * itemsize();
|
}
|
|
inline at::IntArrayRef sizes() const {
|
return impl_.get()->sizes();
|
}
|
|
inline int64_t size_from_dim(int k) const {
|
return size_from_dim_(k, impl_->sizes());
|
}
|
|
inline int64_t size_to_dim(int k) const {
|
return size_to_dim_(k, impl_->sizes());
|
}
|
|
inline int64_t size_between_dim(int k, int l) const {
|
return size_between_dim_(k, l, impl_->sizes());
|
}
|
|
/**
|
* Returns the 'canonical' version of a (usually) user-specified axis,
|
* allowing for negative indexing (e.g., -1 for the last axis).
|
*
|
* @param axis_index the axis index.
|
* If 0 <= index < dim(), return index.
|
* If -ndim <= index <= -1, return (dim() - (-index)),
|
* e.g., the last axis index (dim() - 1) if index == -1,
|
* the second to last if index == -2, etc.
|
* Dies on out of range index.
|
*/
|
inline int canonical_axis_index(int axis_index) const {
|
return canonical_axis_index_(axis_index, impl_->dim());
|
}
|
|
inline int64_t stride(int64_t dim) const {
|
return impl_.get()->stride(dim);
|
}
|
|
inline at::IntArrayRef strides() const {
|
return impl_.get()->strides();
|
}
|
|
inline bool is_contiguous(at::MemoryFormat memory_format=at::MemoryFormat::Contiguous) const {
|
return impl_.get()->is_contiguous(memory_format);
|
}
|
|
/**
|
* Checks if the tensor content is of the given data type.
|
*/
|
template <typename T>
|
inline bool IsType() const {
|
return impl_->storage().IsType<T>();
|
}
|
|
/**
|
* Returns the TypeMeta object associated with the current data type.
|
*/
|
inline const TypeMeta& dtype() const {
|
return impl_->dtype();
|
}
|
|
/**
|
* (To be deprecated) Returns the TypeMeta object associated with the current
|
* data type.
|
*/
|
inline const TypeMeta& meta() const {
|
return impl_->dtype();
|
}
|
|
/**
|
* Returns the i-th dimension of the tensor in int.
|
*
|
* This function returns an int value instead of int64_t, which depending on
|
* the typedef could be int64. If you want int64 dim values, make sure you
|
* call dim() instead.
|
*/
|
inline int dim32(const int i) const {
|
#ifndef NDEBUG
|
CAFFE_ENFORCE_LT_WITH_CALLER(i, static_cast<int>(impl_->dim()), "Exceeding ndim limit");
|
CAFFE_ENFORCE_GE_WITH_CALLER(i, 0, "Cannot have negative dimension index");
|
#endif
|
auto s = impl_->size(i);
|
CAFFE_ENFORCE_LT_WITH_CALLER(s, std::numeric_limits<int>::max());
|
return static_cast<int>(s);
|
}
|
|
inline int64_t size(const int i) const {
|
return impl_->size(i);
|
}
|
|
// To be deprecated
|
inline int64_t dim(const int i) const {
|
return impl_->size(i);
|
}
|
|
const Storage& storage() {
|
return impl_->storage();
|
}
|
|
const Storage& storage() const {
|
return impl_->storage();
|
}
|
|
bool storage_initialized() const {
|
return impl_->storage_initialized();
|
}
|
|
bool dtype_initialized() const {
|
return impl_->dtype_initialized();
|
}
|
};
|
|
/**
|
* Reinitialize a Tensor to given dims and options if necessary, note that
|
* this will not do anything if the
|
* Tensor already has correct size and data type
|
*/
|
CAFFE2_API void ReinitializeTensor(Tensor* t, at::IntArrayRef dims, at::TensorOptions options);
|
|
CAFFE2_API void ReinitializeAndCopyFrom(
|
Tensor* t,
|
at::TensorOptions options,
|
const Tensor& src,
|
bool async = false);
|
|
CAFFE_DECLARE_PREALLOCATED_KNOWN_TYPE(12, Tensor)
|
|
using TensorCPU = Tensor;
|
|
constexpr int k_limit_default_ = 1000;
|
|
// TODO: the following logic can be merged into regular Tensor class methods
|
// after MKLMemory starts to implement Tensor interface
|
|
// Type call registry
|
typedef TypeMeta (*TypeCall)(const void*);
|
TypeCall GetTypeCallFunction(TypeIdentifier id);
|
void RegisterTypeCallFunction(TypeIdentifier id, TypeCall c);
|
|
// Shape call registry
|
typedef vector<int64_t> (*TensorInfoCall)(
|
const void*,
|
size_t* capacity,
|
DeviceOption* device);
|
TensorInfoCall GetTensorInfoFunction(TypeIdentifier id);
|
void RegisterTensorInfoFunction(TypeIdentifier id, TensorInfoCall c);
|
|
// resize helper function
|
void TensorVectorResize(
|
std::vector<Tensor>& tensors,
|
int size,
|
DeviceType type);
|
|
// Tensor factory function
|
CAFFE2_API Tensor empty(at::IntArrayRef dims, at::TensorOptions options);
|
|
/**
|
* @brief Creates a CPU tensor, and fills its contents with the given values.
|
* Values are copied in
|
*/
|
// TODO: can be unified with at::from_blob when Tensor is merged and string
|
// types are supported
|
template <typename T>
|
Tensor TensorCPUFromValues(at::IntArrayRef dims, at::ArrayRef<T> values) {
|
Tensor r = empty(dims, at::device(CPU).dtype<T>());
|
CAFFE_ENFORCE_EQ(values.size(), r.numel());
|
CPUContext context;
|
context.CopyItemsFromCPU(
|
r.dtype(), values.size(), values.data(), r.mutable_data<T>());
|
return r;
|
}
|
|
vector<int64_t>
|
GetTensorInfo(const void* c, size_t* capacity, DeviceOption* device);
|
|
class CAFFE2_API TensorPrinter {
|
public:
|
explicit TensorPrinter(
|
const std::string& tensor_name = "",
|
const std::string& file_name = "",
|
int limit = k_limit_default_);
|
~TensorPrinter();
|
|
template <class T>
|
void Print(const Tensor& tensor);
|
|
void PrintMeta(const Tensor& tensor);
|
|
string MetaStr(const Tensor& tensor);
|
|
private:
|
bool to_file_;
|
int limit_;
|
std::unique_ptr<std::ofstream> log_file_;
|
std::string tensor_name_;
|
};
|
|
template <class T>
|
void TensorPrinter::Print(const Tensor& tensor) {
|
std::stringstream values_stream;
|
// One most likely doesn't want to print int64-number of items for visual
|
// inspection, so we cast down to int here.
|
int total_count = static_cast<int>(std::min(tensor.numel(), int64_t(limit_)));
|
|
const T* tensor_data = tensor.template data<T>();
|
for (int i = 0; i < total_count - 1; ++i) {
|
values_stream << tensor_data[i] << ",";
|
}
|
if (total_count) {
|
// We do not add a comma after the last item.
|
values_stream << tensor_data[total_count - 1];
|
}
|
|
if (to_file_) {
|
(*log_file_) << MetaStr(tensor) << values_stream.str() << std::endl;
|
} else {
|
// Log to console.
|
LOG(INFO) << MetaStr(tensor) << values_stream.str();
|
}
|
}
|
|
} // namespace caffe2
|
#endif // CAFFE2_CORE_TENSOR_H_
|