run_parallel_pdist Class — pytorch Architecture
Architecture documentation for the run_parallel_pdist class in DistanceOpsKernel.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/DistanceOpsKernel.cpp lines 147–186
template <typename F>
static void run_parallel_pdist(Tensor& result, const Tensor& self, const scalar_t p) {
const scalar_t * const self_start = self.const_data_ptr<scalar_t>();
const scalar_t * const self_end = self_start + self.numel();
int64_t n = self.size(0);
int64_t m = self.size(1);
scalar_t * const res_start = result.data_ptr<scalar_t>();
int64_t combs = result.numel(); // n * (n - 1) / 2
// We conceptually iterate over tuples of (i, j, k) where i is the first
// vector from the input, j is the second, and k is the result index. This
// parallelizes over the range of k and infers what i and j are from the
// value of k.
parallel_for(0, combs, internal::GRAIN_SIZE / (16 * m), [p, self_start, self_end, n, m, res_start](int64_t k, int64_t end) {
const Vec pvec(p);
double n2 = static_cast<double>(n) - .5;
// The -1 accounts for floating point truncation issues
int64_t i = static_cast<int64_t>((n2 - std::sqrt(n2 * n2 - 2.0 * static_cast<double>(k) - 1.0)));
int64_t j = k - n * i + i * (i + 1) / 2 + i + 1;
const scalar_t * self_i = self_start + i * m;
const scalar_t * self_j = self_start + j * m;
scalar_t * res = res_start + k;
const scalar_t * const res_end = res_start + end;
while (res != res_end) {
*res = F::finish(vec::map2_reduce_all<scalar_t>(
[&pvec](Vec a, Vec b) { return F::map((a - b).abs(), pvec); },
F::red, self_i, self_j, m), p);
res += 1;
self_j += m;
if (self_j == self_end) {
self_i += m;
self_j = self_i + m;
}
}
});
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free