apply_syevd_batched_rocsolver Class — pytorch Architecture
Architecture documentation for the apply_syevd_batched_rocsolver class in BatchLinearAlgebraLib.cpp from the pytorch codebase.
Entity Profile
Source Code
aten/src/ATen/native/cuda/linalg/BatchLinearAlgebraLib.cpp lines 1269–1315
template <typename scalar_t>
static void apply_syevd_batched_rocsolver(const Tensor& values, const Tensor& vectors, const Tensor& infos, bool upper, bool compute_eigenvectors) {
using value_t = typename c10::scalar_value_type<scalar_t>::type;
auto uplo = upper ? rocblas_fill::rocblas_fill_upper : rocblas_fill::rocblas_fill_lower;
auto evect = compute_eigenvectors ? rocblas_evect::rocblas_evect_original : rocblas_evect::rocblas_evect_none;
int64_t n = vectors.size(-1);
int64_t lda = std::max<int64_t>(1, n);
int64_t batch_size = batchCount(vectors);
auto vectors_stride = matrixStride(vectors);
auto values_stride = n;
auto vectors_data = vectors.data_ptr<scalar_t>();
auto values_data = values.data_ptr<value_t>();
auto infos_data = infos.data_ptr<int>();
auto work_stride = n;
auto work_size = work_stride * batch_size;
// allocate workspace storage on device
auto& allocator = *at::cuda::getCUDADeviceAllocator();
auto work_data = allocator.allocate(sizeof(scalar_t) * work_size);
rocblas_handle handle = static_cast<rocblas_handle>(at::cuda::getCurrentCUDASolverDnHandle());
// rocsolver will manage the workspace size automatically
if(!rocblas_is_managing_device_memory(handle))
TORCH_ROCBLAS_CHECK(rocblas_set_workspace(handle, nullptr, 0));
TORCH_ROCBLAS_CHECK(_rocsolver_syevd_strided_batched<scalar_t>(
handle,
evect,
uplo,
n,
vectors_data,
lda,
vectors_stride,
values_data,
values_stride,
static_cast<scalar_t*>(work_data.get()),
work_stride,
infos_data,
batch_size
));
}
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free