#pragma once #include // TODO move this to c10 namespace namespace torch { namespace jit { using c10::IValue; using Stack = std::vector; using Operation = std::function; // An operation with N inputs and M outputs pops the last N inputs off // the stack and pushes its M inputs onto the stack // before: I0, I1, ... IN <- stack.back() // after: O0, O1, ... OM // operations are defined this way so that ownership of inputs can be // transferred to the operation and it can incrementally drop ownership of // tensors when they become unneeded. For large operations, like 'run an entire // subgraph', this functionality is very important for minimizing gpu memory // usage return value is the relative 'offset' to jump to for the next // operation: // pc += 1 + offset // so a return value of 0 goes to the next instruction // treat the last N elements of the stack as a list, looking up // element i static inline IValue& peek(Stack& stack, size_t i, size_t N) { return *(stack.end() - N + i); } static inline const IValue& peek(const Stack& stack, size_t i, size_t N) { return *(stack.end() - N + i); } // treat the last N elements of the stack as a list, looking up the // slice starting at index i and having length len static inline at::ArrayRef peekSlice( const Stack& stack, size_t i, size_t len, size_t N) { return at::ArrayRef(stack).slice(stack.size() - N + i, len); } static inline at::ArrayRef last(const Stack& stack, size_t N) { return peekSlice(stack, 0, N, N); } static inline void drop(Stack& stack, size_t n) { stack.erase(stack.end() - n, stack.end()); } static inline IValue pop(Stack& stack) { auto r = std::move(stack.back()); stack.pop_back(); return r; } static inline std::vector pop(Stack& stack, size_t n) { std::vector result; result.reserve(n); for (size_t i = 0; i < n; ++i) { result.push_back(std::move(peek(stack, i, n))); } drop(stack, n); return result; } // variadic pop: // int64_t a; at::Tensor b; // pop(stack, a, b); // equivalent to: // b = pop(stack).toTensor(); // a = pop(stack).toInt(); template static inline void pop(Stack& stack, Types&... args) { size_t i = 0; constexpr size_t N = sizeof...(args); int result[N] = { (args = std::move(peek(stack, i++, N)).template to(), 0)...}; (void)result; drop(stack, N); } template static inline void push(Stack& stack, Types&&... args) { (void)std::initializer_list{(stack.emplace_back(std::forward(args)), 0)...}; } template static inline void push_list_elements(Stack& stack, const c10::List& elements) { stack.reserve(stack.size() + elements.size()); for (T elem : elements) { stack.push_back(std::move(elem)); } } // The packer here is carefully written not to make any unnecessary // copies. // pack takes the return values of aten functions pushes them onto the stack template inline void pack(Stack& stack, T&& v) { stack.emplace_back(std::forward(v)); } template struct TuplePacker { // NB: *Not* a universal reference. static void execute(Stack& stack, std::tuple&& t) { // NB: The move here does not "destroy" the entire tuple, that is // not what std::move does; only the particular tuple index // processed here gets stolen. pack(stack, std::get(std::move(t))); TuplePacker::execute(stack, std::move(t)); } }; template struct TuplePacker<0, Args...> { static void execute(Stack& stack, std::tuple&& t){}; }; template inline void pack(Stack& stack, std::tuple&& t) { TuplePacker::execute(stack, std::move(t)); } } // namespace jit } // namespace torch