feat(bindless): Build optimal memory allocations for pipelines

This commit is contained in:
PancakeTAS 2026-04-25 20:12:09 +02:00
parent 310f53e373
commit f3d7f1fcea
No known key found for this signature in database
4 changed files with 215 additions and 2 deletions

View file

@ -13,6 +13,7 @@
#include <cstddef>
#include <cstdint>
#include <ios>
#include <numeric>
#include <stdexcept>
#include <unordered_map>
#include <utility>
@ -228,5 +229,128 @@ Pipeline::Pipeline(
LOG_DEBUG(" Created " << this->m_images.size() << " images with common alignment "
<< alignment << " and memory type bits " << std::hex << types << std::dec)
// Fill in image sizes in respect to alignment
for (auto& image : this->m_images) {
if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput))
continue; // External inputs have dedicated allocations
for (const auto& subimage : image.subimages) {
image.size += vkhelper::align(subimage.memory.size, alignment);
}
}
// Calculate optimal-ish allocations using heuristics & greedy fit strategy
std::vector<size_t> images(signature.images.size());
std::iota(images.begin(), images.end(), 0);
std::ranges::sort(images, [&](const auto& a, const auto& b) {
return this->m_images.at(a).size > this->m_images.at(b).size;
});
std::vector<size_t> placements;
for (const auto& imageIdx : images) {
const auto& image{this->m_images.at(imageIdx)};
if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput))
continue;
auto& allocation{
(image.signature.flags & ImageFlag::Pinned)
? this->m_allocations.at(1)
: this->m_allocations.at(0)
};
auto& segment{allocation.segments.emplace_back()};
vk::DeviceSize size{};
for (const auto& subimage : image.subimages) {
const vk::DeviceSize alignedSize{vkhelper::align(subimage.memory.size, alignment)};
segment.subsegments.push_back({
.size = alignedSize,
.offset = size
});
size += alignedSize;
}
if (image.signature.flags & ImageFlag::Pinned) {
segment = {
.imageIdx = imageIdx,
.subsegments = segment.subsegments,
.size = size,
.offset = allocation.size,
};
allocation.size += size;
} else {
const auto lifetime{image.signature.lifetime};
vk::DeviceSize offset{};
for (const auto& otherSegmentIdx : placements) {
const auto& otherSegment{allocation.segments.at(otherSegmentIdx)};
if (otherSegment.imageIdx == imageIdx)
continue; // Skip self
const auto& otherImage{this->m_images.at(otherSegment.imageIdx)};
const auto& otherLifetime{otherImage.signature.lifetime};
if (lifetime.first > otherLifetime.second ||
lifetime.second < otherLifetime.first)
continue; // Skip horizontally non-overlapping
if (offset >= (otherSegment.offset + otherSegment.size) ||
otherSegment.offset >= (offset + size))
continue; // Skip vertically non-overlapping
offset = otherSegment.offset + otherSegment.size;
}
allocation.size = std::max(allocation.size, offset + size);
segment = {
.imageIdx = imageIdx,
.subsegments = segment.subsegments,
.size = size,
.offset = offset,
};
const size_t i{allocation.segments.size() - 1};
auto it{std::ranges::upper_bound(placements, i,
[&](const auto& a, const auto& b) {
return allocation.segments.at(a).offset < allocation.segments.at(b).offset;
}
)};
placements.insert(it, i);
}
}
LOG_DEBUG(" Computed " << this->m_allocations.size() << " memory allocations")
// Allocate the memory & bind the images
for (auto& allocation : this->m_allocations) {
allocation.memory = vkhelper::allocateMemory(
dld,
device,
physdev,
allocation.size,
types
);
for (const auto& segment : allocation.segments) {
const auto& image{this->m_images.at(segment.imageIdx)};
for (size_t i = 0; i < image.subimages.size(); i++) {
const auto& subsegment{segment.subsegments.at(i)};
const auto& subimage{image.subimages.at(i)};
device.bindImageMemory(
*subimage.image,
*allocation.memory,
segment.offset + subsegment.offset,
dld
);
}
}
LOG_DEBUG(" Allocated memory of size " << allocation.size << " for "
<< allocation.segments.size() << " segments")
}
LOG_DEBUG("Finished building pipeline")
}

View file

@ -7,6 +7,7 @@
#include "pipeline/signature/image.hpp"
#include "utility/vkhelper.hpp"
#include <array>
#include <cstddef>
#include <cstdint>
#include <unordered_map>
@ -122,6 +123,27 @@ namespace lsfgvk::pipeline {
std::vector<ExternalImage> m_externalInputs;
std::vector<ExternalImage> m_externalOutputs;
/// Memory allocation sub-segment
struct MemorySubSegment {
vk::DeviceSize size{};
vk::DeviceSize offset{}; // Offset in memory segment
};
/// Memory allocation segment
struct MemorySegment {
size_t imageIdx{};
std::vector<MemorySubSegment> subsegments;
vk::DeviceSize size{};
vk::DeviceSize offset{}; // Offset in allocation
};
/// Memory allocation info
struct AllocationInfo {
vk::UniqueDeviceMemory memory;
std::vector<MemorySegment> segments;
vk::DeviceSize size{};
};
std::array<AllocationInfo, 2> m_allocations;
std::unordered_map<size_t, vk::UniqueDeviceMemory> m_externalAllocations;
};

View file

@ -230,6 +230,51 @@ vk::UniqueImage vkhelper::createImage(
return device.createImageUnique(imageInfo, nullptr, dld);
}
/* Memory allocations */
vk::UniqueDeviceMemory vkhelper::allocateMemory(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
size_t size,
std::bitset<32> types,
bool hostVisible
) {
// Find a suitable memory type index
const auto memProps{physdev.getMemoryProperties2(dld)};
std::optional<uint32_t> selectedTypeIdx{};
for (uint32_t i = 0; i < memProps.memoryProperties.memoryTypeCount; i++) {
if (!types.test(i))
continue;
const auto& memType{memProps.memoryProperties.memoryTypes.at(i)};
const bool isHostVisible{
memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible &&
memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent
};
if (hostVisible && !isHostVisible)
continue;
selectedTypeIdx = i;
if (memType.propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal)
break;
// Fallback to host-visible memory if no device-local memory is available
}
if (!selectedTypeIdx)
throw std::runtime_error("No suitable memory type found for allocation");
// Allocate memory
const vk::MemoryAllocateInfo allocInfo{
.allocationSize = size,
.memoryTypeIndex = *selectedTypeIdx
};
return device.allocateMemoryUnique(allocInfo, nullptr, dld);
}
/* External memory */
std::pair<vk::UniqueImage, vk::UniqueDeviceMemory> vkhelper::createExternalImage(

View file

@ -16,6 +16,7 @@
#include <vulkan/vulkan_structs.hpp>
// IWYU pragma: end_exports
#include <bitset>
#include <cstddef>
#include <cstdint>
#include <span>
@ -158,6 +159,29 @@ namespace vkhelper {
vk::ImageUsageFlags usage
);
/* Memory allocations */
///
/// Create a Vulkan memory allocation
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param physdev Physical device
/// @param size Allocation size
/// @param types Valid memory type bits
/// @param hostVisible Require host visible memory
/// @return RAII-wrapped Vulkan device memory
/// @throws std::runtime_error on failure
///
vk::UniqueDeviceMemory allocateMemory(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
size_t size,
std::bitset<32> types,
bool hostVisible = false
);
///
/// Align a memory allocation
///
@ -169,8 +193,6 @@ namespace vkhelper {
return (size + align - 1) & ~(align - 1);
}
/* External memory */
///
/// Create a Vulkan image with a fd-exportable dedicated allocation
///