run_backward_parallel_pdist Class — pytorch Architecture
Architecture documentation for the run_backward_parallel_pdist class in DistanceOpsKernel.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/DistanceOpsKernel.cpp lines 292–320
template <typename F>
static void run_backward_parallel_pdist(Tensor& result, const Tensor & grad, const Tensor & self, const scalar_t p, const Tensor& dist) {
const int64_t n = self.size(0);
const int64_t m = self.size(1);
const int64_t gs = grad.stride(0);
const scalar_t * const grad_start = grad.const_data_ptr<scalar_t>();
const scalar_t * const dist_start = dist.const_data_ptr<scalar_t>();
const scalar_t * const self_start = self.const_data_ptr<scalar_t>();
scalar_t * const res_start = result.data_ptr<scalar_t>();
// The only way to parallelize and avoid locking requires parallelizing
// over the columns of the input, i.e. we compute the gradient for the
// first section of each vector independently of the second section, etc.
at::parallel_for(0, m / Vec::size(), internal::GRAIN_SIZE / (8 * n * n), [p, n, m, gs, grad_start, dist_start, self_start, res_start](int64_t l, int64_t end) {
const Vec pvec(p);
const scalar_t * self_l = self_start + l * Vec::size();
scalar_t * res_l = res_start + l * Vec::size();
for (const scalar_t * const res_end = res_start + end * Vec::size(); res_l != res_end; self_l += Vec::size(), res_l += Vec::size()) {
backward_down_column_pdist<F>(self_l, res_l, grad_start, dist_start, pvec, n, m, gs);
}
});
const int64_t remainder = m % Vec::size();
if (remainder) {
backward_down_column_pdist<F>(self_start + (m - remainder), res_start + (m - remainder), grad_start, dist_start, Vec(p), n, m, gs, remainder);
}
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free