CUDAEventPool Class — pytorch Architecture
Architecture documentation for the CUDAEventPool class in CUDAEvent.h from the pytorch codebase.
Entity Profile
Source Code
c10/cuda/CUDAEvent.h lines 293–367
class CUDAEventPool {
public:
using Event = std::unique_ptr<
c10::cuda::CUDAEvent,
std::function<void(c10::cuda::CUDAEvent*)>>;
CUDAEventPool(size_t init_num_events = 0)
: pools_(c10::cuda::device_count()) {
if (init_num_events > 0) {
reserve_events_on_pools(init_num_events);
}
}
// Acquire an event associated with a given device. If device is invalid, fall
// back to a regular CUDAEvent and no pooling.
Event get(const DeviceIndex device) {
if (device < 0 || device >= (DeviceIndex)pools_.size()) {
auto deleter = [](CUDAEvent* event) { delete event; };
return Event(std::make_unique<CUDAEvent>().release(), deleter);
}
auto& pool = pools_[device];
// Create a destructor that returns the event to the appropriate device pool
auto destructor = [&pool](CUDAEvent* event) noexcept {
if (event != nullptr) {
std::lock_guard<std::mutex> lock(pool.mutex_);
pool.event_pool_.emplace_back(event);
}
};
{
std::lock_guard<std::mutex> lock(pool.mutex_);
if (!pool.event_pool_.empty()) {
auto event = std::move(pool.event_pool_.back());
pool.event_pool_.pop_back();
return Event(event.release(), destructor);
}
}
// Pool is empty then create a new Event
return Event(std::make_unique<CUDAEvent>().release(), destructor);
}
void empty_cache() {
for (auto& pool : pools_) {
std::lock_guard<std::mutex> lock(pool.mutex_);
pool.event_pool_.clear();
}
}
private:
// Pre-initialize each device pool with N events. This prevents
// cudaEventCreate() from invoking during steady-state execution.
void reserve_events_on_pools(size_t num_events) {
for (const auto device : c10::irange(pools_.size())) {
std::vector<Event> temp_events;
temp_events.reserve(num_events);
pools_[device].event_pool_.reserve(num_events);
for ([[maybe_unused]] const auto _ : c10::irange(num_events)) {
auto event = get(device);
event->create(device);
temp_events.emplace_back(std::move(event));
}
// Events will be returned to pool when temp_events is destroyed.
}
}
struct alignas(c10::hardware_destructive_interference_size) PerDevicePool {
alignas(c10::hardware_destructive_interference_size) std::mutex mutex_;
std::vector<std::unique_ptr<CUDAEvent>> event_pool_;
};
std::vector<PerDevicePool> pools_;
};
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free