minimum Class — pytorch Architecture
Architecture documentation for the minimum class in vec512_bfloat16.h from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h lines 950–971
template <>
Vectorized<BFloat16> inline minimum(
const Vectorized<BFloat16>& a,
const Vectorized<BFloat16>& b) {
__m512 a_lo, a_hi;
__m512 b_lo, b_hi;
__m512i zero_vec = _mm512_set1_epi32(0);
cvtbf16_fp32(__m512i(a), a_lo, a_hi);
cvtbf16_fp32(__m512i(b), b_lo, b_hi);
auto min_lo = _mm512_min_ps(a_lo, b_lo);
auto min_hi = _mm512_min_ps(a_hi, b_hi);
auto nan_lo_mask = _mm512_cmp_ps_mask(a_lo, b_lo, _CMP_UNORD_Q);
auto nan_hi_mask = _mm512_cmp_ps_mask(a_hi, b_hi, _CMP_UNORD_Q);
auto nan_lo = _mm512_castsi512_ps(
_mm512_mask_set1_epi32(zero_vec, nan_lo_mask, 0xFFFFFFFF));
auto nan_hi = _mm512_castsi512_ps(
_mm512_mask_set1_epi32(zero_vec, nan_hi_mask, 0xFFFFFFFF));
// Exploit the fact that all-ones is a NaN.
auto o1 = _mm512_or_ps(min_lo, nan_lo);
auto o2 = _mm512_or_ps(min_hi, nan_hi);
return cvtfp32_bf16(o1, o2);
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free