grid_sample_2d_grid_slice_iterator Class — pytorch Architecture
Architecture documentation for the grid_sample_2d_grid_slice_iterator class in GridSamplerKernel.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/GridSamplerKernel.cpp lines 1032–1144
template<typename scalar_t, typename ApplyFn>
inline void grid_sample_2d_grid_slice_iterator(
const TensorAccessor<const scalar_t, 3>& grid_slice, const ApplyFn &apply_fn) {
int64_t out_H = grid_slice.size(0);
int64_t out_W = grid_slice.size(1);
int64_t grid_sH = grid_slice.stride(0);
int64_t grid_sW = grid_slice.stride(1);
int64_t grid_sCoor = grid_slice.stride(2);
auto grid_ptr = grid_slice.data();
using Vec = Vectorized<scalar_t>;
using iVec = Vectorized<int_same_size_t<scalar_t>>;
constexpr int64_t step = Vec::size();
// Loop over each output pixel in grid.
// We consider the following three cases (after slicing out the batch
// dimension).
// See detailed discussions under each if-case.
if (at::geometry_is_contiguous({out_H, out_W, 2}, {grid_sH, grid_sW, grid_sCoor})) {
// Case 1:
// Grid is contiguous.
// Strategy: Sequentially load two vectors at the same time, and get,
// e.g., {x0, y0, x1, y1}, {x2, y2, x3, y3}. Then we use
// at::vec::deinterleave2 to get x and y vectors.
auto total_size = out_H * out_W;
for (int64_t spatial_offset = 0; spatial_offset < total_size; spatial_offset += step) {
auto grid_offset = spatial_offset * 2;
auto len = std::min(step, total_size - spatial_offset);
auto vec1 = Vec::loadu(grid_ptr + grid_offset,
std::min(step, len * 2));
auto vec2 = Vec::loadu(grid_ptr + grid_offset + step,
std::max(static_cast<int64_t>(0), len * 2 - step));
auto [x, y] = deinterleave2(vec1, vec2);
// make sure that x and y are valid grid sample locations
if (len < step) {
x = Vec::set(Vec(0), x, len);
y = Vec::set(Vec(0), y, len);
}
apply_fn(x, y, spatial_offset, len);
}
} else if (grid_sW == 1 || out_W == 1) {
// Case 2:
// The W dimension is contiguous.
// This can be common, e.g., grid is from a conv net output of shape
// [N, 2, H, W].
// Strategy: Divide into two contiguous slices each of shape [H, W], and
// each containing x and y vectors. So we sequentially load a
// vector from each of them to get x and y vector
// Function to apply along a contiguous W dimension (or flattened H x W).
auto line_fn = [&](const scalar_t *grid_ptr_x, const scalar_t *grid_ptr_y,
int64_t out_base_offset, int64_t total_size) {
for (int64_t i = 0; i < total_size; i += step) {
auto len = std::min(step, total_size - i);
auto x = Vec::loadu(grid_ptr_x + i, len);
auto y = Vec::loadu(grid_ptr_y + i, len);
// make sure that x and y are valid grid sample locations
if (len < step) {
x = Vec::set(Vec(0), x, len);
y = Vec::set(Vec(0), y, len);
}
apply_fn(x, y, out_base_offset + i, len);
}
};
if (at::geometry_is_contiguous({out_H, out_W}, {grid_sH, grid_sW})) {
// If [H, W] is contiguous, apply line_fn once.
line_fn(grid_ptr, grid_ptr + grid_sCoor, 0, out_H * out_W);
} else {
// If only [W] is contiguous, apply line_fn once for each h slice.
auto grid_ptr_NH = grid_ptr;
for (const auto h : c10::irange(out_H)) {
line_fn(grid_ptr_NH, grid_ptr_NH + grid_sCoor, h * out_W, out_W);
grid_ptr_NH += grid_sH;
}
}
} else {
// Case 3:
// General case.
// Strategy: Do a for-loop over H, for each W slice, use
// at::vec::gather to load the x and y vectors.
int64_t spatial_offset = 0;
const int64_t i_offset_delta = grid_sW * step;
#if !defined(_MSC_VER) && !defined(COMPILING_FOR_MIN_SIZE)
# pragma unroll
#endif
for (const auto h : c10::irange(out_H)) {
auto grid_ptr_x = grid_ptr + h * grid_sH;
auto grid_ptr_y = grid_ptr_x + grid_sCoor;
auto i_offsets = iVec::arange(0, grid_sW);
#if !defined(_MSC_VER) && !defined(COMPILING_FOR_MIN_SIZE)
# pragma unroll
#endif
for (int64_t w = 0; w < out_W; w += step) {
auto len = std::min(step, out_W - w);
if (len < step) {
// prevents illegal memory access, sets the exceeding offsets to zero
i_offsets = iVec::set(iVec(0), i_offsets, len);
}
apply_fn(vec::gather<sizeof(scalar_t)>(grid_ptr_x, i_offsets),
vec::gather<sizeof(scalar_t)>(grid_ptr_y, i_offsets),
spatial_offset, len);
grid_ptr_x += i_offset_delta;
grid_ptr_y += i_offset_delta;
spatial_offset += len;
}
}
}
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free