transpose_mxn Class — pytorch Architecture
Architecture documentation for the transpose_mxn class in vec512_float.h from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/cpu/vec/vec512/vec512_float.h lines 887–922
template <>
inline void transpose_mxn<float>(
const float* src,
int64_t ld_src,
float* dst,
int64_t ld_dst,
int M,
int N) {
int64_t i = 0;
for (; i < M / 16 * 16; i += 16) {
int64_t j = 0;
for (; j < N / 16 * 16; j += 16) {
transpose_mxn_16x16(
src + i * ld_src + j, ld_src, dst + j * ld_dst + i, ld_dst, 16, 16);
}
// handle remainder j
int nrem = N - j;
if (nrem > 0) {
transpose_mxn_16x16(
src + i * ld_src + j, ld_src, dst + j * ld_dst + i, ld_dst, 16, nrem);
}
}
// handle remainder i
int mrem = M - i;
if (mrem > 0) {
int j = 0;
for (; j < N / 16 * 16; j += 16) {
transpose_mxn_16x16(
src + i * ld_src + j, ld_src, dst + j * ld_dst + i, ld_dst, mrem, 16);
}
// handle remainder j
int nrem = N - j;
transpose_mxn_16x16(
src + i * ld_src + j, ld_src, dst + j * ld_dst + i, ld_dst, mrem, nrem);
}
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free