Home / Class/ slow_conv2d_backward_update_grad_input_frame Class — pytorch Architecture

slow_conv2d_backward_update_grad_input_frame Class — pytorch Architecture

Architecture documentation for the slow_conv2d_backward_update_grad_input_frame class in ConvolutionMM2d.cpp from the pytorch codebase.

Entity Profile

Source Code

aten/src/ATen/native/ConvolutionMM2d.cpp lines 286–357

template <typename scalar_t>
void slow_conv2d_backward_update_grad_input_frame(
    TensorAccessor<scalar_t, 3> grad_input,
    TensorAccessor<const scalar_t, 3> grad_output,
    TensorAccessor<const scalar_t, 2> weight,
    scalar_t *fgrad_input,
    int64_t kernel_height,
    int64_t kernel_width,
    int64_t stride_height,
    int64_t stride_width,
    int64_t pad_height,
    int64_t pad_width,
    bool is_channels_last) {
  // Compute fgrad_input = weight.T * grad_output.reshape({grad_output.shape(0), -1})
  // Note gemm expects fortran order, so all 3 matrices are transposed.
  // Swapping argument order cancels this, since C == AB <=> T(C) == T(B)T(A)
  if (is_channels_last) {
    const int64_t m = weight.size(1);
    const int64_t n = grad_output.size(1) * grad_output.size(2);
    const int64_t k = weight.size(0);

    const int64_t lda = m;
    const int64_t ldb = k;
    const int64_t ldc = m;

    at::native::cpublas::gemm(
        TransposeType::NoTranspose,
        TransposeType::NoTranspose,
        m, n, k,
        static_cast<scalar_t>(1),
        weight.data(), lda,
        grad_output.data(), ldb,
        static_cast<scalar_t>(0),
        fgrad_input, ldc);
  } else {
    const int64_t m = grad_output.size(1) * grad_output.size(2);
    const int64_t n = weight.size(1);
    const int64_t k = weight.size(0);

    const int64_t lda = m;
    const int64_t ldb = n;
    const int64_t ldc = m;

    at::native::cpublas::gemm(
        TransposeType::NoTranspose,
        TransposeType::Transpose,
        m, n, k,
        static_cast<scalar_t>(1),
        grad_output.data(), lda,
        weight.data(), ldb,
        static_cast<scalar_t>(0),
        fgrad_input, ldc);
  }

  unfolded2d_acc_stub(
      kCPU,
      c10::CppTypeToScalarType<scalar_t>::value,
      fgrad_input,
      grad_input.data(),
      kernel_height,
      kernel_width,
      stride_height,
      stride_width,
      pad_height,
      pad_width,
      grad_input.size(0),
      grad_input.size(1),
      grad_input.size(2),
      grad_output.size(1),
      grad_output.size(2),
      is_channels_last);
}

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free