EventPool Class — pytorch Architecture
Architecture documentation for the EventPool class in CUDACachingAllocator.cpp from the pytorch codebase.
Entity Profile
Source Code
c10/cuda/CUDACachingAllocator.cpp lines 921–967
class EventPool {
public:
using Event = std::unique_ptr<cudaEvent_t, std::function<void(cudaEvent_t*)>>;
// TODO: Explicit device count
EventPool() : pools_(at::cuda::device_count()) {}
Event get(c10::DeviceIndex device) {
TORCH_INTERNAL_ASSERT(0 <= device);
TORCH_INTERNAL_ASSERT(device < static_cast<int>(pools_.size()));
auto& pool = pools_[device];
auto destructor = [&pool](cudaEvent_t* event) {
std::lock_guard<std::mutex> g(pool.mutex_);
pool.event_pool_.push_back(std::unique_ptr<cudaEvent_t>(event));
};
// Try to acquire an event from the per-device pool.
{
std::lock_guard<std::mutex> g(pool.mutex_);
if (!pool.event_pool_.empty()) {
auto* event = pool.event_pool_.back().release();
pool.event_pool_.pop_back();
return Event(event, destructor);
}
}
// otherwise, allocate a new event that will be returned to the pool on
// destruction.
auto new_ptr = std::make_unique<cudaEvent_t>();
C10_CUDA_CHECK(
cudaEventCreateWithFlags(new_ptr.get(), cudaEventDisableTiming));
return Event(new_ptr.release(), destructor);
}
void empty_cache() {
for (auto& pool : pools_) {
std::lock_guard<std::mutex> g(pool.mutex_);
pool.event_pool_.clear();
}
}
private:
struct PerDevicePool {
alignas(hardware_destructive_interference_size) std::mutex mutex_;
std::vector<std::unique_ptr<cudaEvent_t>> event_pool_;
};
std::vector<PerDevicePool> pools_;
};
Source
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free