vkRunner Class — pytorch Architecture
Architecture documentation for the vkRunner class in speed_benchmark_torch.cc from the pytorch codebase.
Entity Profile
Relationship Graph
Source Code
binaries/speed_benchmark_torch.cc lines 176–233
template<class T>
class vkRunner final : public Runner<T> {
public:
virtual ~vkRunner() = default;
virtual c10::IValue run(
T& module,
const std::vector<c10::IValue>& inputs) override {
if (!module.attr("requires_backend_transfers", at::IValue(true)).toBool()) {
// No need to transfer input/output backends
return module.forward(inputs);
}
if (inputs_.size() == 0) {
// Upload the input tensor(s) to GPU memory.
inputs_.clear();
inputs_.reserve(inputs.size());
for (const auto& input : inputs) {
if (input.isTensor()) {
inputs_.emplace_back(at::rand(input.toTensor().sizes()).vulkan());
}
else if (input.isTensorList()) {
const c10::List<at::Tensor> input_as_list = input.toTensorList();
c10::List<at::Tensor> input_vk_list;
input_vk_list.reserve(input_as_list.size());
for (int i=0; i < input_as_list.size(); ++i) {
const at::Tensor element = input_as_list.get(i);
input_vk_list.emplace_back(at::rand(element.sizes()).vulkan());
}
inputs_.emplace_back(c10::IValue(input_vk_list));
}
else {
CAFFE_THROW("Inputs must only contain IValues of type c10::Tensor or c10::TensorList!");
}
}
}
// Run, and download the output tensor to system memory.
c10::IValue output = module.forward(inputs_);
if (output.isTensor()) {
return output.toTensor().cpu();
}
else if (output.isTensorList()) {
return output.toTensorList().get(0).cpu();
}
else if (output.isList()) {
return output.toList().get(0).toTensor().cpu();
}
else if (output.isTuple()) {
return output.toTuple()->elements()[0].toTensor().cpu();
}
else {
CAFFE_THROW("Outputs must only be either c10::Tensor or c10::TensorList!");
};
}
private:
std::vector<c10::IValue> inputs_;
};
Domain
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free