apply_triangular_solve Class — pytorch Architecture
Architecture documentation for the apply_triangular_solve class in BatchLinearAlgebraLibBlas.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cuda/linalg/BatchLinearAlgebraLibBlas.cpp lines 160–186
template <typename scalar_t>
static void apply_triangular_solve(const Tensor& A, const Tensor& B, bool left, bool upper, TransposeType transpose, bool unitriangular) {
cublasFillMode_t uplo = upper ? CUBLAS_FILL_MODE_UPPER : CUBLAS_FILL_MODE_LOWER;
const auto trans = to_cublas(transpose);
cublasSideMode_t side = left ? CUBLAS_SIDE_LEFT : CUBLAS_SIDE_RIGHT;
cublasDiagType_t diag = unitriangular ? CUBLAS_DIAG_UNIT : CUBLAS_DIAG_NON_UNIT;
auto A_data = A.data_ptr<scalar_t>();
auto B_data = B.data_ptr<scalar_t>();
auto A_mat_stride = matrixStride(A);
auto B_mat_stride = matrixStride(B);
auto batch_size = batchCount(A);
// This allows to pass rectangular A and B when left = True
auto m = cuda_int_cast(left ? A.size(-1) : B.size(-2), "m");
auto n = cuda_int_cast(B.size(-1), "n");
auto lda = std::max<int>(1, cuda_int_cast(A.size(-2), "lda"));
auto ldb = std::max<int>(1, cuda_int_cast(B.size(-2), "ldb"));
auto alpha = scalar_t{1};
for (decltype(batch_size) i = 0; i < batch_size; i++) {
scalar_t* A_working_ptr = &A_data[i * A_mat_stride];
scalar_t* B_working_ptr = &B_data[i * B_mat_stride];
auto handle = at::cuda::getCurrentCUDABlasHandle();
at::cuda::blas::trsm(handle, side, uplo, trans, diag, m, n, &alpha, A_working_ptr, lda, B_working_ptr, ldb);
}
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free