input_requires_grad Class — pytorch Architecture
Architecture documentation for the input_requires_grad class in GridSamplerKernel.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/GridSamplerKernel.cpp lines 594–704
template<bool input_requires_grad>
inline void backward(TensorAccessor<scalar_t, 3>* gInp_slice_ptr,
TensorAccessor<scalar_t, 3>& gGrid_slice,
const TensorAccessor<const scalar_t, 3>& gOut_slice,
const TensorAccessor<const scalar_t, 3>& inp_slice,
int64_t offset, const Vec& grid_x, const Vec& grid_y,
int64_t len) const {
auto [x, gx_mult] = compute_W.apply_get_grad(grid_x);
auto [y, gy_mult] = compute_H.apply_get_grad(grid_y);
auto [
n, s, w, e, nw, ne, sw, se, nw_mask, ne_mask, sw_mask, se_mask,
i_y_n, i_x_w] = compute_interp_params(x, y);
auto i_nw_offset = i_y_n * iVec(inp_sH) + i_x_w * iVec(inp_sW);
auto i_ne_offset = i_nw_offset + iVec(inp_sW);
auto i_sw_offset = i_nw_offset + iVec(inp_sH);
auto i_se_offset = i_sw_offset + iVec(inp_sW);
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
integer_t i_nw_mask_arr[iVec::size()];
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
integer_t i_ne_mask_arr[iVec::size()];
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
integer_t i_sw_mask_arr[iVec::size()];
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
integer_t i_se_mask_arr[iVec::size()];
nw_mask.store(i_nw_mask_arr);
ne_mask.store(i_ne_mask_arr);
sw_mask.store(i_sw_mask_arr);
se_mask.store(i_se_mask_arr);
// i_gInp_*_offset_arr and gInp_corner_arr variables below are unnecessary
// when input_requires_grad is false (they are only used within the
// if-blocks), but required to make the code well-formed.
// When reading input values, we used mask_gather. Unfortunately, there is
// no mask_scatter_add (the backward of mask_gather) in Intel intrinsics.
// So we store the necessary vectors to temporary arrays and use the helper
// mask_scatter_add defined above.
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
integer_t i_gInp_nw_offset_arr[iVec::size()];
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
integer_t i_gInp_ne_offset_arr[iVec::size()];
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
integer_t i_gInp_sw_offset_arr[iVec::size()];
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
integer_t i_gInp_se_offset_arr[iVec::size()];
if (input_requires_grad) {
auto i_gInp_nw_offset = i_y_n * iVec(inp_W) + i_x_w;
auto i_gInp_ne_offset = i_gInp_nw_offset + iVec(1);
auto i_gInp_sw_offset = i_gInp_nw_offset + iVec(inp_W);
auto i_gInp_se_offset = i_gInp_sw_offset + iVec(1);
i_gInp_nw_offset.store(i_gInp_nw_offset_arr);
i_gInp_ne_offset.store(i_gInp_ne_offset_arr);
i_gInp_sw_offset.store(i_gInp_sw_offset_arr);
i_gInp_se_offset.store(i_gInp_se_offset_arr);
}
// NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
scalar_t gInp_corner_arr[Vec::size()];
auto gx = Vec(0), gy = Vec(0);
#if !defined(_MSC_VER) && !defined(COMPILING_FOR_MIN_SIZE)
# pragma unroll
#endif
for (const auto c : c10::irange(C)) {
auto inp_slice_C_ptr = inp_slice[c].data();
auto gOut = Vec::loadu(gOut_slice[c].data() + offset, len);
if (input_requires_grad) {
TORCH_INTERNAL_ASSERT(gInp_slice_ptr);
auto gInp_slice_C_ptr = (*gInp_slice_ptr)[c].data();
(nw * gOut).store(gInp_corner_arr);
mask_scatter_add(gInp_corner_arr, gInp_slice_C_ptr, i_gInp_nw_offset_arr, i_nw_mask_arr, len);
(ne * gOut).store(gInp_corner_arr);
mask_scatter_add(gInp_corner_arr, gInp_slice_C_ptr, i_gInp_ne_offset_arr, i_ne_mask_arr, len);
(sw * gOut).store(gInp_corner_arr);
mask_scatter_add(gInp_corner_arr, gInp_slice_C_ptr, i_gInp_sw_offset_arr, i_sw_mask_arr, len);
(se * gOut).store(gInp_corner_arr);
mask_scatter_add(gInp_corner_arr, gInp_slice_C_ptr, i_gInp_se_offset_arr, i_se_mask_arr, len);
}
// mask_gather zeros out the mask, so we need to make copies
Vec nw_mask_copy = nw_mask;
Vec ne_mask_copy = ne_mask;
Vec sw_mask_copy = sw_mask;
Vec se_mask_copy = se_mask;
auto nw_val = mask_gather<sizeof(scalar_t)>(Vec(0), inp_slice_C_ptr, i_nw_offset, nw_mask_copy);
auto ne_val = mask_gather<sizeof(scalar_t)>(Vec(0), inp_slice_C_ptr, i_ne_offset, ne_mask_copy);
auto sw_val = mask_gather<sizeof(scalar_t)>(Vec(0), inp_slice_C_ptr, i_sw_offset, sw_mask_copy);
auto se_val = mask_gather<sizeof(scalar_t)>(Vec(0), inp_slice_C_ptr, i_se_offset, se_mask_copy);
gx = gx + ((ne_val - nw_val) * s + (se_val - sw_val) * n) * gOut;
gy = gy + ((sw_val - nw_val) * e + (se_val - ne_val) * w) * gOut;
}
gx = gx * gx_mult;
gy = gy * gy_mult;
constexpr int64_t step = Vec::size();
auto interleaved_gGrid = interleave2(gx, gy);
auto gGrid_ptr = gGrid_slice.data() + offset * 2;
std::get<0>(interleaved_gGrid).store(gGrid_ptr,
std::min(len * 2, step));
std::get<1>(interleaved_gGrid).store(gGrid_ptr + step,
std::max(static_cast<int64_t>(0), len * 2 - step));
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free