#ifndef THC_DEVICE_ALLOCATOR_INC
|
#define THC_DEVICE_ALLOCATOR_INC
|
|
#include <c10/cuda/CUDAStream.h>
|
#include <c10/core/Allocator.h>
|
#include <c10/cuda/CUDAMacros.h>
|
#include <c10/util/Registry.h>
|
|
#include <mutex>
|
|
namespace c10 {
|
|
// Caching allocator will execute every registered callback if it unable to find
|
// block inside of already allocated area.
|
class C10_CUDA_API FreeMemoryCallback {
|
public:
|
virtual ~FreeMemoryCallback() {};
|
virtual bool Execute() = 0;
|
};
|
|
C10_DECLARE_REGISTRY(FreeCudaMemoryCallbacksRegistry, FreeMemoryCallback);
|
#define REGISTER_FREE_MEMORY_CALLBACK(name, ...) \
|
C10_REGISTER_CLASS(FreeCudaMemoryCallbacksRegistry, name, __VA_ARGS__);
|
|
namespace cuda {
|
|
// TODO: Turn this into an honest to goodness class. I briefly attempted to do
|
// this, but it was a bit irritating to figure out how to also correctly
|
// apply pimpl pattern so I didn't have to leak any internal implementation
|
// details in the header (CUDACachingAllocator could be made a pimpl, but
|
// you also need to appropriately define a class which is a subclass
|
// of Allocator. Not impossible, but required a bit more surgery than
|
// I wanted to do at the time.)
|
//
|
// Why is this using a namespace rather than old-style THCCachingAllocator_
|
// prefix? Mostly because it made the HIPify rules easier to write; _ is
|
// not counted as a word boundary, so you would otherwise have to list each
|
// of these functions.
|
|
namespace CUDACachingAllocator {
|
|
C10_CUDA_API void* raw_alloc(size_t nbytes);
|
C10_CUDA_API void raw_delete(void* ptr);
|
|
C10_CUDA_API Allocator* get();
|
C10_CUDA_API void emptyCache();
|
C10_CUDA_API void cacheInfo(int dev_id, size_t* cachedAndFree, size_t* largestBlock);
|
C10_CUDA_API void* getBaseAllocation(void *ptr, size_t *size);
|
C10_CUDA_API void recordStream(void *ptr, CUDAStream stream);
|
C10_CUDA_API uint64_t currentMemoryAllocated(int device);
|
C10_CUDA_API uint64_t maxMemoryAllocated(int device);
|
C10_CUDA_API void resetMaxMemoryAllocated(int device);
|
C10_CUDA_API uint64_t currentMemoryCached(int device);
|
C10_CUDA_API uint64_t maxMemoryCached(int device);
|
C10_CUDA_API void resetMaxMemoryCached(int device);
|
|
C10_CUDA_API std::mutex* getFreeMutex();
|
|
C10_CUDA_API std::shared_ptr<void> getIpcDevPtr(std::string handle);
|
|
} // namespace CUDACachingAllocator
|
|
}} // namespace c10::cuda
|
|
#endif
|