avg_pool2d_out_frame Class — pytorch Architecture
Architecture documentation for the avg_pool2d_out_frame class in AveragePool2d.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/quantized/cpu/AveragePool2d.cpp lines 33–117
template <typename scalar_t>
void avg_pool2d_out_frame(
const Tensor& input,
Tensor& output,
int64_t nInputPlane,
int64_t inputWidth,
int64_t inputHeight,
int64_t outputWidth,
int64_t outputHeight,
int kW,
int kH,
int dW,
int dH,
int padW,
int padH,
bool count_include_pad,
std::optional<int64_t> divisor_override) {
Tensor input_contig = input.contiguous();
auto input_data = input_contig.data_ptr<scalar_t>();
auto output_data = output.data_ptr<scalar_t>();
const auto scale_factor = input.q_scale() / output.q_scale();
const auto input_zero_point = input.q_zero_point();
const auto output_zero_point = output.q_zero_point();
at::parallel_for(0, nInputPlane, 0, [&](int64_t start, int64_t end) {
for (const auto k : c10::irange(start, end)) {
/* For all output pixels... */
scalar_t* ptr_output = output_data + k * outputWidth * outputHeight;
const scalar_t* ptr_input = input_data + k * inputWidth * inputHeight;
auto minimum =
std::numeric_limits<typename scalar_t::underlying>::lowest();
auto maximum = std::numeric_limits<typename scalar_t::underlying>::max();
for (int64_t yy = 0; yy < outputHeight; yy++) {
for (int64_t xx = 0; xx < outputWidth; xx++) {
/* Compute the mean of the input image... */
int64_t hstart = yy * dH - padH;
int64_t wstart = xx * dW - padW;
int64_t hend = std::min(hstart + kH, inputHeight + padH);
int64_t wend = std::min(wstart + kW, inputWidth + padW);
int64_t pool_size = (hend - hstart) * (wend - wstart);
hstart = std::max(hstart, static_cast<int64_t>(0));
wstart = std::max(wstart, static_cast<int64_t>(0));
hend = std::min(hend, inputHeight);
wend = std::min(wend, inputWidth);
int sum_int = 0;
ptr_output->val_ = 0;
int64_t divide_factor = 0;
int64_t size = (hend - hstart) * (wend - wstart);
if (divisor_override.has_value()) {
divide_factor = divisor_override.value();
} else {
if (count_include_pad) {
divide_factor = pool_size;
} else {
divide_factor = (hend - hstart) * (wend - wstart);
}
}
for (int64_t ky = hstart; ky < hend; ky++) {
for (int64_t kx = wstart; kx < wend; kx++)
sum_int += (ptr_input + ky * inputWidth + kx)->val_;
}
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
float multiplier = scale_factor / divide_factor;
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
sum_int -= size * input_zero_point;
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions)
float sum = sum_int * 1.0;
/* Update output by requantizing the result */
ptr_output->val_ =
static_cast<typename scalar_t::underlying>(std::min<int32_t>(
std::max<int32_t>(
std::nearbyint(sum * multiplier + output_zero_point),
minimum),
maximum));
ptr_output++;
}
}
}
});
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free