PreGuardBytes Class — pytorch Architecture

Architecture documentation for the PreGuardBytes class in CPUAllocator.cpp from the pytorch codebase.

Class cpp

Entity Profile

Source Code

c10/core/CPUAllocator.cpp lines 75–155

template <uint32_t PreGuardBytes, uint32_t PostGuardBytes>
class DefaultMobileCPUAllocator final : public at::Allocator {
 public:
  static void deleter(void* const pointer) {
    if (C10_UNLIKELY(!pointer)) {
      return;
    }
    // TODO: enable with better TLS support on mobile
    // profiledCPUMemoryReporter().Delete(pointer);
    auto allocator_ptr = GetThreadLocalCachingAllocator();
    auto profiling_allocator_ptr = GetThreadLocalProfilingAllocator();
    if (allocator_ptr != nullptr) {
      allocator_ptr->free(pointer);
    } else if (profiling_allocator_ptr != nullptr) {
      profiling_allocator_ptr->free(pointer);
    } else {
      c10::free_cpu(pointer);
      // This adds extra cost to freeing memory to the default case when
      // caching allocator is not enabled.
      // NOLINTNEXTLINE(clang-analyzer-unix.Malloc)
      CPUCachingAllocator::record_free(pointer);
      auto allocation_planner = GetThreadLocalAllocationPlanner();
      if (allocation_planner != nullptr) {
        allocation_planner->record_free(pointer);
      }
    }
  }

  DataPtr allocate(const size_t nbytes) override {
    if (C10_UNLIKELY(0u == nbytes)) {
      return {
          nullptr,
          nullptr,
          &deleter,
          at::Device(DeviceType::CPU),
      };
    }

    auto alloc_size = PreGuardBytes + nbytes + PostGuardBytes;
    void* data = nullptr;
    auto allocator_ptr = GetThreadLocalCachingAllocator();
    auto profiling_allocator_ptr = GetThreadLocalProfilingAllocator();
    if (allocator_ptr != nullptr) {
      data = allocator_ptr->allocate(alloc_size);
    } else if (profiling_allocator_ptr != nullptr) {
      data = profiling_allocator_ptr->allocate(alloc_size);
    } else {
      try {
        data = c10::alloc_cpu(alloc_size);
      } catch (c10::Error& e) {
        profiledCPUMemoryReporter().OutOfMemory(alloc_size);
        throw e;
      }
      auto allocation_planner = GetThreadLocalAllocationPlanner();
      if (allocation_planner != nullptr) {
        allocation_planner->record_allocation(alloc_size, data);
      }
    }
    profiledCPUMemoryReporter().New(data, alloc_size);
    return {
        reinterpret_cast<uint8_t*>(data) + PreGuardBytes,
        data,
        &deleter,
        at::Device(DeviceType::CPU),
    };
  }

  DeleterFnPtr raw_deleter() const override {
    return deleter;
  }

  bool is_simple_data_ptr(const c10::DataPtr& data_ptr) const final {
    return reinterpret_cast<const uint8_t*>(data_ptr.get()) ==
        reinterpret_cast<const uint8_t*>(data_ptr.get_context()) +
        PreGuardBytes;
  }

  void copy_data(void* dest, const void* src, std::size_t count) const final {
    default_copy_data(dest, src, count);
  }
};

Source

View on GitHub

Analyze Your Own Codebase

Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.

Try Supermodel Free