gemm_batched_with_stride Class — pytorch Architecture
Architecture documentation for the gemm_batched_with_stride class in CPUBlas.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/CPUBlas.cpp lines 663–701
template <typename scalar_t>
void gemm_batched_with_stride(
TransposeType transa, TransposeType transb,
int64_t batch_size, int64_t m, int64_t n, int64_t k,
scalar_t alpha,
const scalar_t *a, int64_t lda, int64_t batch_stride_a,
const scalar_t *b, int64_t ldb, int64_t batch_stride_b,
scalar_t beta,
scalar_t *c, int64_t ldc, int64_t batch_stride_c) {
if (batch_size == 1) {
return gemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
}
if constexpr (AT_MKL_ENABLED() && is_blas_library_type<scalar_t>::value) {
internal::normalize_last_dims(transa, transb, m, n, k, &lda, &ldb, &ldc);
if (use_blas_gemm(transa, transb, m, n, k, lda, ldb, ldc)) {
c10::SmallBuffer<const scalar_t*, 16> a_ptrs(batch_size);
c10::SmallBuffer<const scalar_t*, 16> b_ptrs(batch_size);
c10::SmallBuffer<scalar_t*, 16> c_ptrs(batch_size);
for (const auto batch : c10::irange(batch_size)) {
a_ptrs[batch] = a + batch_stride_a * batch;
b_ptrs[batch] = b + batch_stride_b * batch;
c_ptrs[batch] = c + batch_stride_c * batch;
}
gemm_batched_mkl_impl(
transa, transb, batch_size, m, n, k, alpha, a_ptrs.data(), lda,
b_ptrs.data(), ldb, beta, c_ptrs.data(), ldc);
} else {
gemm_batched_with_stride_generic(
transa, transb, batch_size, m, n, k, alpha, a, lda, batch_stride_a,
b, ldb, batch_stride_b, beta, c, ldc, batch_stride_c);
}
} else {
gemm_batched_with_stride_generic(transa, transb, batch_size, m, n, k, alpha,
a, lda, batch_stride_a, b, ldb, batch_stride_b,
beta, c, ldc, batch_stride_c);
}
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free