Home / Class/ declval Class — pytorch Architecture

declval Class — pytorch Architecture

Architecture documentation for the declval class in vec256_zarch.h from the pytorch codebase.

Entity Profile

Source Code

aten/src/ATen/cpu/vec/vec256/zarch/vec256_zarch.h lines 2234–2683

template <typename T>
struct Vectorized<T, std::enable_if_t<is_zarch_implemented_complex<T>()>> {
 public:
  using underline_type = decltype(std::declval<T>().imag());
  using value_type = T;
  using vtype = ZSimdVect<underline_type>;
  using vmaskType = ZSimdVectBinary<underline_type>;
  using vinner_type = Vectorized<underline_type>;
  using size_type = int;
  using vinner_data = typename Vectorized<underline_type>::vinner_data;

  static constexpr size_type size() {
    return VECTOR_WIDTH / sizeof(value_type);
  }

 private:
  vinner_type _vec;

 public:
  Vectorized() {}

  C10_ALWAYS_INLINE Vectorized(const vinner_data& v)
      : _vec{v.first, v.second} {}

  template <typename U = T, std::enable_if_t<(sizeof(U) == 16), int> = 0>
  C10_ALWAYS_INLINE Vectorized(T s1, T s2)
      : _vec{s1.real(), s1.imag(), s2.real(), s2.imag()} {}

  template <typename U = T, std::enable_if_t<(sizeof(U) == 8), int> = 0>
  C10_ALWAYS_INLINE Vectorized(T s1, T s2, T s3, T s4)
      : _vec{
            s1.real(),
            s1.imag(),
            s2.real(),
            s2.imag(),
            s3.real(),
            s3.imag(),
            s4.real(),
            s4.imag()} {}

  template <typename U = T, std::enable_if_t<(sizeof(U) == 16), int> = 0>
  C10_ALWAYS_INLINE Vectorized(T s) : Vectorized<T>(s, s) {}

  template <typename U = T, std::enable_if_t<(sizeof(U) == 8), int> = 0>
  C10_ALWAYS_INLINE Vectorized(T s) : Vectorized<T>(s, s, s, s) {}

  C10_ALWAYS_INLINE operator vinner_type() const {
    return _vec;
  }

  C10_ALWAYS_INLINE const vinner_type& vec() const {
    return _vec;
  }

  C10_ALWAYS_INLINE operator vinner_data() const {
    return _vec.data();
  }

  C10_ALWAYS_INLINE vinner_data data() const {
    return _vec.data();
  }

  template <typename U>
  static Vectorized<T> C10_ALWAYS_INLINE
  loadu(const U* ptr, int count = size()) {
    return Vectorized<T>{vinner_type::loadu(ptr, 2 * count)};
  }

  template <typename U>
  void C10_ALWAYS_INLINE store(U* ptr, int count = size()) const {
    return _vec.store(ptr, 2 * count);
  }

  static Vectorized<T> blendv(
      const Vectorized<T>& a,
      const Vectorized<T>& b,
      const Vectorized<T>& mask) {
    // convert std::complex<V> index mask to V index mask: xy -> xxyy
    vinner_type vmask = mask.vec();
    auto mask_complex = vinner_type(
        vec_mergeh(vmask.vec0(), vmask.vec0()),
        vec_mergeh(vmask.vec1(), vmask.vec1()));
    return Vectorized<T>{vinner_type::blendv(a.vec(), b.vec(), mask_complex)};
  }

  template <int64_t mask>
  static auto C10_ALWAYS_INLINE
  blend(const Vectorized<T>& a, const Vectorized<T>& b) {
    constexpr int mask_complex = maskForComplex<sizeof(T)>(mask);
    return Vectorized<T>{
        vinner_type::template blend<mask_complex>(a.vec(), b.vec())};
  }

  template <typename step_t, typename U = T>
  static std::enable_if_t<sizeof(U) == 16, Vectorized<T>> arange(
      T base = 0,
      step_t step = static_cast<step_t>(1)) {
    return Vectorized<T>(base, base + step);
  }

  template <typename step_t, typename U = T>
  static std::enable_if_t<sizeof(U) == 8, Vectorized<T>> arange(
      T base = 0,
      step_t step = static_cast<step_t>(1)) {
    return Vectorized<T>(
        base,
        base + step,
        base + value_type(2) * step,
        base + value_type(3) * step);
  }

  template <int16_t Z, int16_t C>
  static inline std::enable_if_t<(Z >= C), Vectorized<T>> set_inner(
      const Vectorized<T>& a,
      const Vectorized<T>& b,
      size_t count) {
    return b;
  }

  template <int16_t Z, int16_t C>
  static inline std::enable_if_t<(Z < C), Vectorized<T>> set_inner(
      const Vectorized<T>& a,
      const Vectorized<T>& b,
      size_t count) {
    if (count == Z)
      return blend<allbitset(Z)>(a, b);
    else
      return set_inner<Z + 1, C>(a, b, count);
  }

  static Vectorized<T> set(
      const Vectorized<T>& a,
      const Vectorized<T>& b,
      size_t count = size()) {
    if (count == 0)
      return a;
    return set_inner<1, size()>(a, b, count);
  }

  const T& operator[](int idx) const = delete;
  T& operator[](int idx) = delete;

  template <
      typename U = T,
      std::enable_if_t<std::is_same<U, c10::complex<float>>::value, int> = 0>
  Vectorized<T> mapOrdinary(T (*const f)(const T&)) const {
    auto v0 = _vec.vec0();
    auto v1 = _vec.vec1();
    return Vectorized<T>{
        f(T(v0[0], v0[1])),
        f(T(v0[2], v0[3])),
        f(T(v1[0], v1[1])),
        f(T(v1[2], v1[3]))};
  }

  template <
      typename U = T,
      std::enable_if_t<std::is_same<U, c10::complex<double>>::value, int> = 0>
  Vectorized<U> mapOrdinary(T (*const f)(const T&)) const {
    auto v0 = _vec.vec0();
    auto v1 = _vec.vec1();
    return Vectorized<T>{f(T(v0[0], v0[1])), f(T(v1[0], v1[1]))};
  }

  template <
      typename U = T,
      std::enable_if_t<std::is_same<U, c10::complex<float>>::value, int> = 0>
  Vectorized<T> mapOrdinary(T (*const f)(T)) const {
    auto v0 = _vec.vec0();
    auto v1 = _vec.vec1();
    return Vectorized<T>{
        f(T(v0[0], v0[1])),
        f(T(v0[2], v0[3])),
        f(T(v1[0], v1[1])),
        f(T(v1[2], v1[3]))};
  }

  template <
      typename U = T,
      std::enable_if_t<std::is_same<U, c10::complex<double>>::value, int> = 0>
  Vectorized<T> mapOrdinary(T (*const f)(T)) const {
    auto v0 = _vec.vec0();
    auto v1 = _vec.vec1();
    return Vectorized<T>{f(T(v0[0], v0[1])), f(T(v1[0], v1[1]))};
  }

  template <
      typename U = T,
      std::enable_if_t<std::is_same<U, c10::complex<float>>::value, int> = 0>
  inline Vectorized<T> mapOrdinary(
      T (*const f)(const T&, const T&),
      const Vectorized<T>& b) const {
    auto v0 = _vec.vec0();
    auto v1 = _vec.vec1();
    auto bvec = b.vec();
    auto b0 = bvec.vec0();
    auto b1 = bvec.vec1();
    T a00 = f(T(v0[0], v0[1]), T(b0[0], b0[1]));
    T a01 = f(T(v0[2], v0[3]), T(b0[2], b0[3]));
    T a02 = f(T(v1[0], v1[1]), T(b1[0], b1[1]));
    T a03 = f(T(v1[2], v1[3]), T(b1[2], b1[3]));
    return Vectorized<T>{a00, a01, a02, a03};
  }

  template <
      typename U = T,
      std::enable_if_t<std::is_same<U, c10::complex<double>>::value, int> = 0>
  inline Vectorized<T> mapOrdinary(
      T (*const f)(const T&, const T&),
      const Vectorized<T>& b) const {
    auto v0 = _vec.vec0();
    auto v1 = _vec.vec1();
    auto bvec = b.vec();
    auto b0 = bvec.vec0();
    auto b1 = bvec.vec1();
    U a00 = f(U(v0[0], v0[1]), U(b0[0], b0[1]));
    U a01 = f(U(v1[0], v1[1]), U(b1[0], b1[1]));
    return Vectorized<T>{a00, a01};
  }

  template <
      typename U = T,
      std::enable_if_t<std::is_same<U, c10::complex<float>>::value, int> = 0>
  static typename Vectorized<T>::vinner_type real_neg(
      const typename Vectorized<T>::vinner_type& a) {
    const auto swap_mask = ZSimdVectBinary<uint8_t>{
        0, 1, 2, 3, 20, 21, 22, 23, 8, 9, 10, 11, 28, 29, 30, 31};

    auto a_neg = a.neg();
    vtype v0 = vec_perm(a_neg.vec0(), a.vec0(), swap_mask);
    vtype v1 = vec_perm(a_neg.vec1(), a.vec1(), swap_mask);
    return {v0, v1};
  }

  template <
      typename U = T,
      std::enable_if_t<std::is_same<U, c10::complex<double>>::value, int> = 0>
  static typename Vectorized<T>::vinner_type real_neg(
      const typename Vectorized<T>::vinner_type& a) {
    auto a_neg = a.neg();
    vtype v0 = {a_neg.vec0()[0], a.vec0()[1]};
    vtype v1 = {a_neg.vec1()[0], a.vec1()[1]};
    return {v0, v1};
  }

  Vectorized<T> angle2_() const {
    auto b_a = _vec.swapped(); // b        a
    return Vectorized<T>{_vec.atan2(b_a).swapped()};
  }

  Vectorized<T> angle() const {
    return angle2_().real();
  }

  Vectorized<T> atan() const {
    // atan(x) = i/2 * ln((i + z)/(i - z))
    auto ione = Vectorized<T>{vinner_type(image_one<underline_type>())};
    auto sum = ione + *this;
    auto sub = ione - *this;
    auto ln = (sum / sub).log(); // ln((i + z)/(i - z))
    return ln *
        Vectorized<T>{vinner_type(image_half<underline_type>())}; // i/2*ln()
  }

  Vectorized<T> atanh() const {
    return mapOrdinary(std::atanh);
  }

  Vectorized<T> asin() const {
    // asin(x)
    // = -i*ln(iz + sqrt(1 -z^2))
    // = -i*ln((ai - b) + sqrt(1 - (a + bi)*(a + bi)))
    // = -i*ln((-b + ai) + sqrt(1 - (a**2 - b**2) - 2*abi))
#if 1
    vinner_type cnj = conj().vec();
    vinner_type b_a = cnj.swapped();
    vinner_type ab = cnj * b_a;
    vinner_type im = ab + ab;
    vinner_type val_2 = _vec * _vec;
    vinner_type val_2_swapped = val_2.swapped();
    vinner_type re = vinner_type::horizontal_sub_perm(val_2, val_2_swapped);
    re = vinner_type(static_cast<underline_type>(1)) - re;
    constexpr int blend_mask =
        blend_choice<T>(); // 0x0A for complex<double> , 0xAA for complex<float>
    vinner_type blendx = vinner_type::template blend<blend_mask>(re, im);
    auto root = Vectorized<T>(blendx).sqrt();
    auto ln = Vectorized<T>(Vectorized<T>(b_a) + root).log();
    return Vectorized<T>(ln.vec().swapped()).conj();
#else
    return mapOrdinary(std::asin);
#endif
  }

  Vectorized<T> acos() const {
    // acos(x) = pi/2 - asin(x)
    return Vectorized<T>(vinner_type(pi_half<underline_type>())) - asin();
  }

  Vectorized<T> sin() const {
    return mapOrdinary(std::sin);
  }
  Vectorized<T> sinh() const {
    return mapOrdinary(std::sinh);
  }
  Vectorized<T> cos() const {
    return mapOrdinary(std::cos);
  }
  Vectorized<T> cosh() const {
    return mapOrdinary(std::cosh);
  }
  Vectorized<T> ceil() const {
    return Vectorized<T>{_vec.ceil()};
  }
  Vectorized<T> floor() const {
    return Vectorized<T>{_vec.floor()};
  }
  Vectorized<T> neg() const {
    return Vectorized<T>(_vec.neg());
  }
  Vectorized<T> round() const {
    return Vectorized<T>{_vec.round()};
  }
  Vectorized<T> tan() const {
    return mapOrdinary(std::tan);
  }
  Vectorized<T> tanh() const {
    return mapOrdinary(std::tanh);
  }
  Vectorized<T> trunc() const {
    return Vectorized<T>{_vec.trunc()};
  }

  Vectorized<T> C10_ALWAYS_INLINE eq(const Vectorized<T>& other) const {
    auto eq = _vec.eq(other._vec); // compares real and imag individually
    // If both real numbers and imag numbers are equal, then the complex numbers
    // are equal
    auto real = eq & vinner_type(real_mask<underline_type>());
    auto imag = (eq & vinner_type(image_mask<underline_type>())).swapped();
    return Vectorized<T>{real & imag};
  }
  Vectorized<T> C10_ALWAYS_INLINE ne(const Vectorized<T>& other) const {
    auto ne = _vec.ne(other._vec); // compares real and imag individually
    // If either real numbers or imag numbers are not equal, then the complex
    // numbers are not equal
    auto real = ne & vinner_type(real_mask<underline_type>());
    auto imag = (ne & vinner_type(image_mask<underline_type>())).swapped();
    return Vectorized<T>{real | imag};
  }

  Vectorized<T> real() const {
    return Vectorized<T>(_vec & vinner_type(real_mask<underline_type>()));
  }
  Vectorized<T> imag_() const {
    return Vectorized<T>(_vec & vinner_type(image_mask<underline_type>()));
  }
  Vectorized<T> imag() const {
    return Vectorized<T>{
        (_vec & vinner_type(image_mask<underline_type>())).swapped()};
  }

  Vectorized<T> conj() const {
    return Vectorized<T>(_vec ^ vinner_type(isign_mask<underline_type>()));
  }

  vinner_data abs_2_() const {
    auto a = _vec * _vec;
    a = a + a.swapped();
    return a.mergee().data();
  }

  static T abs_helper(const T& value) {
    return T(std::abs(value));
  }

  Vectorized<T> abs() const {
    return mapOrdinary(abs_helper);
  }

  Vectorized<T> exp() const {
    return mapOrdinary(std::exp);
  }

  Vectorized<T> exp2() const {
    return mapOrdinary(exp2_impl);
  }

  Vectorized<T> expm1() const {
    return mapOrdinary(std::expm1);
  }

  Vectorized<T> log() const {
    return mapOrdinary(std::log);
  }

  Vectorized<T> log2() const {
    // log2eB_inv
    auto ret = log();
    return Vectorized<T>{ret._vec * vinner_type(log2e_inv<underline_type>())};
  }

  Vectorized<T> log10() const {
    auto ret = log();
    return Vectorized<T>{ret._vec * vinner_type(log10e_inv<underline_type>())};
  }

  Vectorized<T> log1p() const {
    return mapOrdinary(std::log1p);
  }

  Vectorized<T> sgn() const {
    return mapOrdinary(at::native::sgn_impl);
  }

  Vectorized<T> pow(const Vectorized<T>& exp) const {
    return mapOrdinary(std::pow, exp);
  }

  Vectorized<T> sqrt() const {
    return mapOrdinary(std::sqrt);
  }

  Vectorized<T> reciprocal() const {
    // re + im*i = (a + bi)  / (c + di)
    // re = (ac + bd)/abs_2() = c/abs_2()
    // im = (bc - ad)/abs_2() = d/abs_2()
    vinner_type c_d = _vec ^ vinner_type(isign_mask<underline_type>());
    vinner_type abs = abs_2_();
    return Vectorized<T>{c_d / abs};
  }

  Vectorized<T> rsqrt() const {
    return sqrt().reciprocal();
  }

  Vectorized<T> lt(const Vectorized<T>& other) const {
    TORCH_CHECK(false, "not supported for complex numbers");
  }

  Vectorized<T> le(const Vectorized<T>& other) const {
    TORCH_CHECK(false, "not supported for complex numbers");
  }

  Vectorized<T> gt(const Vectorized<T>& other) const {
    TORCH_CHECK(false, "not supported for complex numbers");
  }

  Vectorized<T> ge(const Vectorized<T>& other) const {
    TORCH_CHECK(false, "not supported for complex numbers");
  }
};

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free