Home / Class/ vkRunner Class — pytorch Architecture

vkRunner Class — pytorch Architecture

Architecture documentation for the vkRunner class in speed_benchmark_torch.cc from the pytorch codebase.

Entity Profile

Relationship Graph

Source Code

binaries/speed_benchmark_torch.cc lines 176–233

template<class T>
class vkRunner final : public Runner<T> {
 public:
  virtual ~vkRunner() = default;
  virtual c10::IValue run(
      T& module,
      const std::vector<c10::IValue>& inputs) override {
    if (!module.attr("requires_backend_transfers", at::IValue(true)).toBool()) {
      // No need to transfer input/output backends
      return module.forward(inputs);
    }

    if (inputs_.size() == 0) {
      // Upload the input tensor(s) to GPU memory.
      inputs_.clear();
      inputs_.reserve(inputs.size());
      for (const auto& input : inputs) {
        if (input.isTensor()) {
          inputs_.emplace_back(at::rand(input.toTensor().sizes()).vulkan());
        }
        else if (input.isTensorList()) {
          const c10::List<at::Tensor> input_as_list = input.toTensorList();
          c10::List<at::Tensor> input_vk_list;
          input_vk_list.reserve(input_as_list.size());
          for (int i=0; i < input_as_list.size(); ++i) {
            const at::Tensor element = input_as_list.get(i);
            input_vk_list.emplace_back(at::rand(element.sizes()).vulkan());
          }
          inputs_.emplace_back(c10::IValue(input_vk_list));
        }
        else {
          CAFFE_THROW("Inputs must only contain IValues of type c10::Tensor or c10::TensorList!");
        }
      }
    }

    // Run, and download the output tensor to system memory.
    c10::IValue output = module.forward(inputs_);
    if (output.isTensor()) {
      return output.toTensor().cpu();
    }
    else if (output.isTensorList()) {
      return output.toTensorList().get(0).cpu();
    }
    else if (output.isList()) {
      return output.toList().get(0).toTensor().cpu();
    }
    else if (output.isTuple()) {
      return output.toTuple()->elements()[0].toTensor().cpu();
    }
    else {
      CAFFE_THROW("Outputs must only be either c10::Tensor or c10::TensorList!");
    };
  }

 private:
  std::vector<c10::IValue> inputs_;
};

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free