Vectorized Class — pytorch Architecture
Architecture documentation for the Vectorized class in vec512_qint.h from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/cpu/vec/vec512/vec512_qint.h lines 1112–1230
template <>
struct Vectorized<c10::qint32> : public VectorizedQuantizedConverter<
c10::qint32,
std::array<Vectorized<float>, 1>,
std::array<Vectorized<c10::qint32>, 1>,
16> {
Vectorized()
: VectorizedQuantizedConverter<
c10::qint32,
std::array<Vectorized<float>, 1>,
std::array<Vectorized<c10::qint32>, 1>,
16>() {}
Vectorized(c10::qint32 val)
: VectorizedQuantizedConverter<
c10::qint32,
std::array<Vectorized<float>, 1>,
std::array<Vectorized<c10::qint32>, 1>,
16>(val) {}
Vectorized(const void* ptr)
: VectorizedQuantizedConverter<
c10::qint32,
std::array<Vectorized<float>, 1>,
std::array<Vectorized<c10::qint32>, 1>,
16>(ptr) {}
static Vectorized<c10::qint32> loadu(const void* ptr) {
return Vectorized<c10::qint32>(ptr);
}
static Vectorized<c10::qint32> loadu(const void* ptr, int64_t count) {
__at_align__ value_type tmp_values[size()];
// Ensure uninitialized memory does not change the output value See
// https://github.com/pytorch/pytorch/issues/32502 for more details. We do
// not initialize arrays to zero using "={0}" because gcc would compile it
// to two instructions while a loop would be compiled to one instruction.
for (const auto i : c10::irange(size())) {
tmp_values[i] = 0;
}
std::memcpy(
tmp_values,
reinterpret_cast<const value_type*>(ptr),
count * sizeof(value_type));
return loadu(tmp_values);
}
static Vectorized<c10::qint32> quantize(
const float_vec_return_type& rhs,
float scale,
int32_t zero_point,
float inverse_scale [[maybe_unused]]) {
std::array<value_type, size()> qvals;
std::array<float, float_num_vecs() * 16> float_vals;
for (const auto i : c10::irange(float_num_vecs())) {
rhs[i].store(&float_vals[i * 16], 16);
}
at::native::quantize_vec<c10::qint32, /*precision=*/32>(
scale,
zero_point,
float_vals.data(),
(c10::qint32*)qvals.data(),
16 * float_num_vecs());
return Vectorized<c10::qint32>::loadu(qvals.data());
}
Vectorized<c10::qint32> maximum(Vectorized<c10::qint32> b) const {
Vectorized<c10::qint32> retval;
for (const auto i : c10::irange(size())) {
retval.vals[i] = std::max<value_type>(vals[i], b.vals[i]);
}
return retval;
}
Vectorized<c10::qint32> minimum(Vectorized<c10::qint32> b) const {
Vectorized<c10::qint32> retval;
for (const auto i : c10::irange(size())) {
retval.vals[i] = std::min<value_type>(vals[i], b.vals[i]);
}
return retval;
}
Vectorized<c10::qint32> relu(Vectorized<c10::qint32> zero_point) const {
return maximum(zero_point);
}
Vectorized<c10::qint32> relu6(
Vectorized<c10::qint32> zero_point,
Vectorized<c10::qint32> q_six) {
Vectorized<c10::qint32> retval;
for (const auto i : c10::irange(size())) {
retval.vals[i] = std::min<value_type>(
std::max<value_type>(vals[i], zero_point.vals[i]), q_six.vals[i]);
}
return retval;
}
int_vec_return_type widening_subtract(Vectorized<c10::qint32> b) const {
int_vec_return_type retval;
for (const auto i : c10::irange(size())) {
retval[0].vals[i] = vals[i] - b.vals[i];
}
return retval;
}
static Vectorized<c10::qint32> requantize_from_int(
const int_vec_return_type& inp,
float multiplier,
int32_t zero_point) {
Vectorized<c10::qint32> retval;
for (const auto i : c10::irange(size())) {
retval.vals[i] =
std::nearbyint(static_cast<float>(inp[0].vals[i]) * multiplier) +
zero_point;
}
return retval;
}
};
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free