mirror of
https://github.com/PancakeTAS/lsfg-vk.git
synced 2026-05-10 19:21:42 +00:00
feat(bindless): Build optimal memory allocations for pipelines
This commit is contained in:
parent
310f53e373
commit
f3d7f1fcea
4 changed files with 215 additions and 2 deletions
|
|
@ -13,6 +13,7 @@
|
|||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <ios>
|
||||
#include <numeric>
|
||||
#include <stdexcept>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
|
@ -228,5 +229,128 @@ Pipeline::Pipeline(
|
|||
LOG_DEBUG(" Created " << this->m_images.size() << " images with common alignment "
|
||||
<< alignment << " and memory type bits " << std::hex << types << std::dec)
|
||||
|
||||
// Fill in image sizes in respect to alignment
|
||||
for (auto& image : this->m_images) {
|
||||
if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput))
|
||||
continue; // External inputs have dedicated allocations
|
||||
|
||||
for (const auto& subimage : image.subimages) {
|
||||
image.size += vkhelper::align(subimage.memory.size, alignment);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate optimal-ish allocations using heuristics & greedy fit strategy
|
||||
std::vector<size_t> images(signature.images.size());
|
||||
std::iota(images.begin(), images.end(), 0);
|
||||
|
||||
std::ranges::sort(images, [&](const auto& a, const auto& b) {
|
||||
return this->m_images.at(a).size > this->m_images.at(b).size;
|
||||
});
|
||||
|
||||
std::vector<size_t> placements;
|
||||
for (const auto& imageIdx : images) {
|
||||
const auto& image{this->m_images.at(imageIdx)};
|
||||
if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput))
|
||||
continue;
|
||||
|
||||
auto& allocation{
|
||||
(image.signature.flags & ImageFlag::Pinned)
|
||||
? this->m_allocations.at(1)
|
||||
: this->m_allocations.at(0)
|
||||
};
|
||||
auto& segment{allocation.segments.emplace_back()};
|
||||
|
||||
vk::DeviceSize size{};
|
||||
for (const auto& subimage : image.subimages) {
|
||||
const vk::DeviceSize alignedSize{vkhelper::align(subimage.memory.size, alignment)};
|
||||
segment.subsegments.push_back({
|
||||
.size = alignedSize,
|
||||
.offset = size
|
||||
});
|
||||
|
||||
size += alignedSize;
|
||||
}
|
||||
|
||||
if (image.signature.flags & ImageFlag::Pinned) {
|
||||
segment = {
|
||||
.imageIdx = imageIdx,
|
||||
.subsegments = segment.subsegments,
|
||||
.size = size,
|
||||
.offset = allocation.size,
|
||||
};
|
||||
allocation.size += size;
|
||||
} else {
|
||||
const auto lifetime{image.signature.lifetime};
|
||||
|
||||
vk::DeviceSize offset{};
|
||||
for (const auto& otherSegmentIdx : placements) {
|
||||
const auto& otherSegment{allocation.segments.at(otherSegmentIdx)};
|
||||
if (otherSegment.imageIdx == imageIdx)
|
||||
continue; // Skip self
|
||||
|
||||
const auto& otherImage{this->m_images.at(otherSegment.imageIdx)};
|
||||
const auto& otherLifetime{otherImage.signature.lifetime};
|
||||
|
||||
if (lifetime.first > otherLifetime.second ||
|
||||
lifetime.second < otherLifetime.first)
|
||||
continue; // Skip horizontally non-overlapping
|
||||
|
||||
if (offset >= (otherSegment.offset + otherSegment.size) ||
|
||||
otherSegment.offset >= (offset + size))
|
||||
continue; // Skip vertically non-overlapping
|
||||
|
||||
offset = otherSegment.offset + otherSegment.size;
|
||||
}
|
||||
|
||||
allocation.size = std::max(allocation.size, offset + size);
|
||||
segment = {
|
||||
.imageIdx = imageIdx,
|
||||
.subsegments = segment.subsegments,
|
||||
.size = size,
|
||||
.offset = offset,
|
||||
};
|
||||
|
||||
const size_t i{allocation.segments.size() - 1};
|
||||
auto it{std::ranges::upper_bound(placements, i,
|
||||
[&](const auto& a, const auto& b) {
|
||||
return allocation.segments.at(a).offset < allocation.segments.at(b).offset;
|
||||
}
|
||||
)};
|
||||
placements.insert(it, i);
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DEBUG(" Computed " << this->m_allocations.size() << " memory allocations")
|
||||
|
||||
// Allocate the memory & bind the images
|
||||
for (auto& allocation : this->m_allocations) {
|
||||
allocation.memory = vkhelper::allocateMemory(
|
||||
dld,
|
||||
device,
|
||||
physdev,
|
||||
allocation.size,
|
||||
types
|
||||
);
|
||||
|
||||
for (const auto& segment : allocation.segments) {
|
||||
const auto& image{this->m_images.at(segment.imageIdx)};
|
||||
|
||||
for (size_t i = 0; i < image.subimages.size(); i++) {
|
||||
const auto& subsegment{segment.subsegments.at(i)};
|
||||
const auto& subimage{image.subimages.at(i)};
|
||||
|
||||
device.bindImageMemory(
|
||||
*subimage.image,
|
||||
*allocation.memory,
|
||||
segment.offset + subsegment.offset,
|
||||
dld
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
LOG_DEBUG(" Allocated memory of size " << allocation.size << " for "
|
||||
<< allocation.segments.size() << " segments")
|
||||
}
|
||||
|
||||
LOG_DEBUG("Finished building pipeline")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include "pipeline/signature/image.hpp"
|
||||
#include "utility/vkhelper.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
|
|
@ -122,6 +123,27 @@ namespace lsfgvk::pipeline {
|
|||
std::vector<ExternalImage> m_externalInputs;
|
||||
std::vector<ExternalImage> m_externalOutputs;
|
||||
|
||||
/// Memory allocation sub-segment
|
||||
struct MemorySubSegment {
|
||||
vk::DeviceSize size{};
|
||||
vk::DeviceSize offset{}; // Offset in memory segment
|
||||
};
|
||||
|
||||
/// Memory allocation segment
|
||||
struct MemorySegment {
|
||||
size_t imageIdx{};
|
||||
std::vector<MemorySubSegment> subsegments;
|
||||
vk::DeviceSize size{};
|
||||
vk::DeviceSize offset{}; // Offset in allocation
|
||||
};
|
||||
|
||||
/// Memory allocation info
|
||||
struct AllocationInfo {
|
||||
vk::UniqueDeviceMemory memory;
|
||||
std::vector<MemorySegment> segments;
|
||||
vk::DeviceSize size{};
|
||||
};
|
||||
std::array<AllocationInfo, 2> m_allocations;
|
||||
std::unordered_map<size_t, vk::UniqueDeviceMemory> m_externalAllocations;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -230,6 +230,51 @@ vk::UniqueImage vkhelper::createImage(
|
|||
return device.createImageUnique(imageInfo, nullptr, dld);
|
||||
}
|
||||
|
||||
/* Memory allocations */
|
||||
|
||||
vk::UniqueDeviceMemory vkhelper::allocateMemory(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
size_t size,
|
||||
std::bitset<32> types,
|
||||
bool hostVisible
|
||||
) {
|
||||
// Find a suitable memory type index
|
||||
const auto memProps{physdev.getMemoryProperties2(dld)};
|
||||
|
||||
std::optional<uint32_t> selectedTypeIdx{};
|
||||
for (uint32_t i = 0; i < memProps.memoryProperties.memoryTypeCount; i++) {
|
||||
if (!types.test(i))
|
||||
continue;
|
||||
const auto& memType{memProps.memoryProperties.memoryTypes.at(i)};
|
||||
|
||||
const bool isHostVisible{
|
||||
memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible &&
|
||||
memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent
|
||||
};
|
||||
if (hostVisible && !isHostVisible)
|
||||
continue;
|
||||
|
||||
selectedTypeIdx = i;
|
||||
|
||||
if (memType.propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal)
|
||||
break;
|
||||
|
||||
// Fallback to host-visible memory if no device-local memory is available
|
||||
}
|
||||
|
||||
if (!selectedTypeIdx)
|
||||
throw std::runtime_error("No suitable memory type found for allocation");
|
||||
|
||||
// Allocate memory
|
||||
const vk::MemoryAllocateInfo allocInfo{
|
||||
.allocationSize = size,
|
||||
.memoryTypeIndex = *selectedTypeIdx
|
||||
};
|
||||
return device.allocateMemoryUnique(allocInfo, nullptr, dld);
|
||||
}
|
||||
|
||||
/* External memory */
|
||||
|
||||
std::pair<vk::UniqueImage, vk::UniqueDeviceMemory> vkhelper::createExternalImage(
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
#include <vulkan/vulkan_structs.hpp>
|
||||
// IWYU pragma: end_exports
|
||||
|
||||
#include <bitset>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <span>
|
||||
|
|
@ -158,6 +159,29 @@ namespace vkhelper {
|
|||
vk::ImageUsageFlags usage
|
||||
);
|
||||
|
||||
/* Memory allocations */
|
||||
|
||||
///
|
||||
/// Create a Vulkan memory allocation
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param physdev Physical device
|
||||
/// @param size Allocation size
|
||||
/// @param types Valid memory type bits
|
||||
/// @param hostVisible Require host visible memory
|
||||
/// @return RAII-wrapped Vulkan device memory
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
vk::UniqueDeviceMemory allocateMemory(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
size_t size,
|
||||
std::bitset<32> types,
|
||||
bool hostVisible = false
|
||||
);
|
||||
|
||||
///
|
||||
/// Align a memory allocation
|
||||
///
|
||||
|
|
@ -169,8 +193,6 @@ namespace vkhelper {
|
|||
return (size + align - 1) & ~(align - 1);
|
||||
}
|
||||
|
||||
/* External memory */
|
||||
|
||||
///
|
||||
/// Create a Vulkan image with a fd-exportable dedicated allocation
|
||||
///
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue