cpu_channel_shuffle Class — pytorch Architecture
Architecture documentation for the cpu_channel_shuffle class in ChannelShuffleKernel.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cpu/ChannelShuffleKernel.cpp lines 15–58
template <typename scalar_t>
void cpu_channel_shuffle(
TensorBase& output,
const TensorBase& input,
int64_t groups) {
auto input_data = input.data_ptr<scalar_t>();
auto output_data = output.data_ptr<scalar_t>();
int64_t nbatch = input.size(0);
int64_t channels = input.size(1);
int64_t channels_per_group = channels / groups;
int64_t image_size = input.numel() / nbatch / channels;
// treat input tensor as shape of [n, g, oc, ...]
// output tensor as shape of [n, oc, g, ...]
//
// 3d, 4d, 5d: parallel on dimension of n, c
using Vec = vec::Vectorized<scalar_t>;
int64_t inner_size = image_size - (image_size % Vec::size());
at::parallel_for (0, nbatch * /* oc*g */channels, 0, [&](int64_t begin, int64_t end) {
int64_t n = 0;
int64_t oc = 0;
int64_t g = 0;
data_index_init(begin, n, nbatch, oc, channels_per_group, g, groups);
for (const auto i : c10::irange(begin, end)) {
scalar_t* output_ptr = output_data + i * image_size;
scalar_t* input_ptr = input_data + n * channels * image_size +
g * channels_per_group * image_size + oc * image_size;
int64_t d = 0;
for (; d < inner_size; d += Vec::size()) {
Vec data_vec = Vec::loadu(input_ptr + d);
data_vec.store(output_ptr + d);
}
for (; d < image_size; d++) {
output_ptr[d] = c10::load(&(input_ptr[d]));
}
// move on to next output index
data_index_step(n, nbatch, oc, channels_per_group, g, groups);
}
});
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free