From f3d7f1fceaf250ebb8dd9ad23bad4e343083df9a Mon Sep 17 00:00:00 2001 From: PancakeTAS Date: Sat, 25 Apr 2026 20:12:09 +0200 Subject: [PATCH] feat(bindless): Build optimal memory allocations for pipelines --- lsfg-vk-backend/src/modules/pipeline.cpp | 124 +++++++++++++++++++++++ lsfg-vk-backend/src/modules/pipeline.hpp | 22 ++++ lsfg-vk-backend/src/utility/vkhelper.cpp | 45 ++++++++ lsfg-vk-backend/src/utility/vkhelper.hpp | 26 ++++- 4 files changed, 215 insertions(+), 2 deletions(-) diff --git a/lsfg-vk-backend/src/modules/pipeline.cpp b/lsfg-vk-backend/src/modules/pipeline.cpp index 3eeb6ff..e98ad24 100644 --- a/lsfg-vk-backend/src/modules/pipeline.cpp +++ b/lsfg-vk-backend/src/modules/pipeline.cpp @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -228,5 +229,128 @@ Pipeline::Pipeline( LOG_DEBUG(" Created " << this->m_images.size() << " images with common alignment " << alignment << " and memory type bits " << std::hex << types << std::dec) + // Fill in image sizes in respect to alignment + for (auto& image : this->m_images) { + if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput)) + continue; // External inputs have dedicated allocations + + for (const auto& subimage : image.subimages) { + image.size += vkhelper::align(subimage.memory.size, alignment); + } + } + + // Calculate optimal-ish allocations using heuristics & greedy fit strategy + std::vector images(signature.images.size()); + std::iota(images.begin(), images.end(), 0); + + std::ranges::sort(images, [&](const auto& a, const auto& b) { + return this->m_images.at(a).size > this->m_images.at(b).size; + }); + + std::vector placements; + for (const auto& imageIdx : images) { + const auto& image{this->m_images.at(imageIdx)}; + if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput)) + continue; + + auto& allocation{ + (image.signature.flags & ImageFlag::Pinned) + ? this->m_allocations.at(1) + : this->m_allocations.at(0) + }; + auto& segment{allocation.segments.emplace_back()}; + + vk::DeviceSize size{}; + for (const auto& subimage : image.subimages) { + const vk::DeviceSize alignedSize{vkhelper::align(subimage.memory.size, alignment)}; + segment.subsegments.push_back({ + .size = alignedSize, + .offset = size + }); + + size += alignedSize; + } + + if (image.signature.flags & ImageFlag::Pinned) { + segment = { + .imageIdx = imageIdx, + .subsegments = segment.subsegments, + .size = size, + .offset = allocation.size, + }; + allocation.size += size; + } else { + const auto lifetime{image.signature.lifetime}; + + vk::DeviceSize offset{}; + for (const auto& otherSegmentIdx : placements) { + const auto& otherSegment{allocation.segments.at(otherSegmentIdx)}; + if (otherSegment.imageIdx == imageIdx) + continue; // Skip self + + const auto& otherImage{this->m_images.at(otherSegment.imageIdx)}; + const auto& otherLifetime{otherImage.signature.lifetime}; + + if (lifetime.first > otherLifetime.second || + lifetime.second < otherLifetime.first) + continue; // Skip horizontally non-overlapping + + if (offset >= (otherSegment.offset + otherSegment.size) || + otherSegment.offset >= (offset + size)) + continue; // Skip vertically non-overlapping + + offset = otherSegment.offset + otherSegment.size; + } + + allocation.size = std::max(allocation.size, offset + size); + segment = { + .imageIdx = imageIdx, + .subsegments = segment.subsegments, + .size = size, + .offset = offset, + }; + + const size_t i{allocation.segments.size() - 1}; + auto it{std::ranges::upper_bound(placements, i, + [&](const auto& a, const auto& b) { + return allocation.segments.at(a).offset < allocation.segments.at(b).offset; + } + )}; + placements.insert(it, i); + } + } + + LOG_DEBUG(" Computed " << this->m_allocations.size() << " memory allocations") + + // Allocate the memory & bind the images + for (auto& allocation : this->m_allocations) { + allocation.memory = vkhelper::allocateMemory( + dld, + device, + physdev, + allocation.size, + types + ); + + for (const auto& segment : allocation.segments) { + const auto& image{this->m_images.at(segment.imageIdx)}; + + for (size_t i = 0; i < image.subimages.size(); i++) { + const auto& subsegment{segment.subsegments.at(i)}; + const auto& subimage{image.subimages.at(i)}; + + device.bindImageMemory( + *subimage.image, + *allocation.memory, + segment.offset + subsegment.offset, + dld + ); + } + } + + LOG_DEBUG(" Allocated memory of size " << allocation.size << " for " + << allocation.segments.size() << " segments") + } + LOG_DEBUG("Finished building pipeline") } diff --git a/lsfg-vk-backend/src/modules/pipeline.hpp b/lsfg-vk-backend/src/modules/pipeline.hpp index 32484f6..2d11a0f 100644 --- a/lsfg-vk-backend/src/modules/pipeline.hpp +++ b/lsfg-vk-backend/src/modules/pipeline.hpp @@ -7,6 +7,7 @@ #include "pipeline/signature/image.hpp" #include "utility/vkhelper.hpp" +#include #include #include #include @@ -122,6 +123,27 @@ namespace lsfgvk::pipeline { std::vector m_externalInputs; std::vector m_externalOutputs; + /// Memory allocation sub-segment + struct MemorySubSegment { + vk::DeviceSize size{}; + vk::DeviceSize offset{}; // Offset in memory segment + }; + + /// Memory allocation segment + struct MemorySegment { + size_t imageIdx{}; + std::vector subsegments; + vk::DeviceSize size{}; + vk::DeviceSize offset{}; // Offset in allocation + }; + + /// Memory allocation info + struct AllocationInfo { + vk::UniqueDeviceMemory memory; + std::vector segments; + vk::DeviceSize size{}; + }; + std::array m_allocations; std::unordered_map m_externalAllocations; }; diff --git a/lsfg-vk-backend/src/utility/vkhelper.cpp b/lsfg-vk-backend/src/utility/vkhelper.cpp index cdb08c2..ecd6016 100644 --- a/lsfg-vk-backend/src/utility/vkhelper.cpp +++ b/lsfg-vk-backend/src/utility/vkhelper.cpp @@ -230,6 +230,51 @@ vk::UniqueImage vkhelper::createImage( return device.createImageUnique(imageInfo, nullptr, dld); } +/* Memory allocations */ + +vk::UniqueDeviceMemory vkhelper::allocateMemory( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + size_t size, + std::bitset<32> types, + bool hostVisible +) { + // Find a suitable memory type index + const auto memProps{physdev.getMemoryProperties2(dld)}; + + std::optional selectedTypeIdx{}; + for (uint32_t i = 0; i < memProps.memoryProperties.memoryTypeCount; i++) { + if (!types.test(i)) + continue; + const auto& memType{memProps.memoryProperties.memoryTypes.at(i)}; + + const bool isHostVisible{ + memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible && + memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent + }; + if (hostVisible && !isHostVisible) + continue; + + selectedTypeIdx = i; + + if (memType.propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal) + break; + + // Fallback to host-visible memory if no device-local memory is available + } + + if (!selectedTypeIdx) + throw std::runtime_error("No suitable memory type found for allocation"); + + // Allocate memory + const vk::MemoryAllocateInfo allocInfo{ + .allocationSize = size, + .memoryTypeIndex = *selectedTypeIdx + }; + return device.allocateMemoryUnique(allocInfo, nullptr, dld); +} + /* External memory */ std::pair vkhelper::createExternalImage( diff --git a/lsfg-vk-backend/src/utility/vkhelper.hpp b/lsfg-vk-backend/src/utility/vkhelper.hpp index 92a3299..60fcbc8 100644 --- a/lsfg-vk-backend/src/utility/vkhelper.hpp +++ b/lsfg-vk-backend/src/utility/vkhelper.hpp @@ -16,6 +16,7 @@ #include // IWYU pragma: end_exports +#include #include #include #include @@ -158,6 +159,29 @@ namespace vkhelper { vk::ImageUsageFlags usage ); + /* Memory allocations */ + + /// + /// Create a Vulkan memory allocation + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param physdev Physical device + /// @param size Allocation size + /// @param types Valid memory type bits + /// @param hostVisible Require host visible memory + /// @return RAII-wrapped Vulkan device memory + /// @throws std::runtime_error on failure + /// + vk::UniqueDeviceMemory allocateMemory( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + size_t size, + std::bitset<32> types, + bool hostVisible = false + ); + /// /// Align a memory allocation /// @@ -169,8 +193,6 @@ namespace vkhelper { return (size + align - 1) & ~(align - 1); } - /* External memory */ - /// /// Create a Vulkan image with a fd-exportable dedicated allocation ///