apply_norm_per_row Class — pytorch Architecture
Architecture documentation for the apply_norm_per_row class in WeightNormKernel.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/WeightNormKernel.cpp lines 105–127
template <typename scalar_t>
inline std::enable_if_t<is_reduced_floating_point_v<scalar_t>, void>
apply_norm_per_row(
scalar_t* w_ptr,
const scalar_t* v_ptr,
const float* a_ptr,
int64_t size) {
using bVec = vec::Vectorized<scalar_t>;
using fVec = vec::Vectorized<float>;
int64_t d = 0;
for (; d < size - (size % bVec::size()); d += bVec::size()) {
bVec v_bvec = bVec::loadu(v_ptr + d);
auto [v_fvec0, v_fvec1] = vec::convert_to_float<scalar_t>(v_bvec);
fVec w_fvec0 = fVec::loadu(a_ptr + d) * v_fvec0;
fVec w_fvec1 = fVec::loadu(a_ptr + d + fVec::size()) * v_fvec1;
bVec w_bvec = vec::convert_from_float<scalar_t>(w_fvec0, w_fvec1);
w_bvec.store(w_ptr + d);
}
for(; d < size; ++d) {
w_ptr[d] = float(v_ptr[d]) * a_ptr[d];
}
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free