From 980fa68bec99dfd98d653619e9e49dcbe75605fa Mon Sep 17 00:00:00 2001 From: Anonymous Date: Fri, 9 Jan 2026 22:47:51 -0800 Subject: [PATCH] feat(frame-pacing): integrate Generator into virtual swapchain Integrate frame generation into the virtual swapchain's presentation thread. The thread now copies virtual images to backend sources, schedules frame generation, and presents generated frames before the original frame. Key changes: - Add Generator to MyVkSwapchain for frame generation - Implement frame generation loop in thread_main() - Add virtual_PresentGenerated() helper for generated frame presentation - Add VK_KHR_swapchain to required device extensions Bug fixes: - Fix backend GPU selection on multi-GPU systems by passing the layer's active GPU name to the backend device picker. This ensures DMA-BUF memory sharing works correctly between layer and backend. - Fix crash in CommandBuffer::submit() when waitSemaphores is empty by adding empty vector check before accessing .back() - Add VK_KHR_present_wait2 compatibility shims for older Vulkan SDKs --- .../include/lsfg-vk-common/vulkan/vulkan.hpp | 32 ++++ lsfg-vk-common/src/vulkan/command_buffer.cpp | 6 +- lsfg-vk-layer/src/entrypoint.cpp | 7 + lsfg-vk-layer/src/hooks/device.cpp | 1 + lsfg-vk-layer/src/hooks/layer.cpp | 15 +- lsfg-vk-layer/src/hooks/layer.hpp | 7 + lsfg-vk-layer/src/hooks/swapchain.cpp | 138 ++++++++++++++++-- lsfg-vk-layer/src/hooks/swapchain.hpp | 11 ++ 8 files changed, 194 insertions(+), 23 deletions(-) diff --git a/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp b/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp index 40510f2..9e7fff4 100644 --- a/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp +++ b/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp @@ -15,6 +15,38 @@ #include #include +// Compatibility shim for VK_KHR_present_wait2 (proposed extension, not yet in SDK) +#ifndef VK_KHR_present_wait2 +#define VK_KHR_present_wait2 1 +#define VK_STRUCTURE_TYPE_PRESENT_WAIT_2_INFO_KHR ((VkStructureType)1000572000) +#define VK_STRUCTURE_TYPE_PRESENT_ID_2_KHR ((VkStructureType)1000572001) + +typedef struct VkPresentWait2InfoKHR { + VkStructureType sType; + const void* pNext; + uint64_t presentId; + uint64_t timeout; +} VkPresentWait2InfoKHR; + +typedef struct VkPresentId2KHR { + VkStructureType sType; + const void* pNext; + uint32_t swapchainCount; + const uint64_t* pPresentIds; +} VkPresentId2KHR; + +typedef VkResult (VKAPI_PTR *PFN_vkWaitForPresent2KHR)(VkDevice device, VkSwapchainKHR swapchain, const VkPresentWait2InfoKHR* pPresentWaitInfo); +#endif + +// Compatibility shim for VK_KHR_swapchain_maintenance1 (uses EXT naming in older SDKs) +#ifndef VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_KHR +#define VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_KHR VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT +#define VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_KHR VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_EXT +typedef VkSwapchainPresentModeInfoEXT VkSwapchainPresentModeInfoKHR; +typedef VkSwapchainPresentFenceInfoEXT VkSwapchainPresentFenceInfoKHR; +typedef VkReleaseSwapchainImagesInfoEXT VkReleaseSwapchainImagesInfoKHR; +#endif + namespace vk { /// vulkan instance function pointers diff --git a/lsfg-vk-common/src/vulkan/command_buffer.cpp b/lsfg-vk-common/src/vulkan/command_buffer.cpp index a3baee5..af8c0ec 100644 --- a/lsfg-vk-common/src/vulkan/command_buffer.cpp +++ b/lsfg-vk-common/src/vulkan/command_buffer.cpp @@ -210,13 +210,15 @@ void CommandBuffer::submit(const vk::Vulkan& vk, waitSemaphores.push_back(waitTimelineSemaphore); std::vector waitValues(waitSemaphores.size(), 0); - waitValues.back() = waitValue; + if (!waitValues.empty()) + waitValues.back() = waitValue; if (signalTimelineSemaphore) signalSemaphores.push_back(signalTimelineSemaphore); std::vector signalValues(signalSemaphores.size(), 0); - signalValues.back() = signalValue; + if (!signalValues.empty()) + signalValues.back() = signalValue; // create submit info const VkTimelineSemaphoreSubmitInfo timelineInfo{ diff --git a/lsfg-vk-layer/src/entrypoint.cpp b/lsfg-vk-layer/src/entrypoint.cpp index feb856d..b5ecef4 100644 --- a/lsfg-vk-layer/src/entrypoint.cpp +++ b/lsfg-vk-layer/src/entrypoint.cpp @@ -176,6 +176,13 @@ namespace { } try { + // Get physical device name for backend GPU selection + VkPhysicalDeviceProperties2 props{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 + }; + myvk_instance.funcs().GetPhysicalDeviceProperties2(physdev, &props); + myvk_layer.setActiveGpu(props.properties.deviceName); + auto myvk_device = std::make_unique(myvk_layer, myvk_instance, physdev, *info, layer_info->GetDeviceProcAddr, setLoaderData, diff --git a/lsfg-vk-layer/src/hooks/device.cpp b/lsfg-vk-layer/src/hooks/device.cpp index 0b6ccb6..600ce07 100644 --- a/lsfg-vk-layer/src/hooks/device.cpp +++ b/lsfg-vk-layer/src/hooks/device.cpp @@ -64,6 +64,7 @@ MyVkDevice::MyVkDevice(MyVkLayer& layer, MyVkInstance& instance, info.ppEnabledExtensionNames, info.enabledExtensionCount, { + "VK_KHR_swapchain", "VK_KHR_external_memory", "VK_KHR_external_memory_fd", "VK_KHR_external_semaphore", diff --git a/lsfg-vk-layer/src/hooks/layer.cpp b/lsfg-vk-layer/src/hooks/layer.cpp index 17c028d..d6613b5 100644 --- a/lsfg-vk-layer/src/hooks/layer.cpp +++ b/lsfg-vk-layer/src/hooks/layer.cpp @@ -72,18 +72,23 @@ backend::Instance& MyVkLayer::backend() { else dll = ls::findShaderDll(); + // Use profile.gpu if set, otherwise use the active GPU from layer device + const std::optional gpuFilter = profile.gpu.has_value() + ? profile.gpu + : (this->active_gpu.empty() ? std::nullopt : std::optional(this->active_gpu)); + this->backend_instance.emplace( - [gpu = profile.gpu]( + [gpuFilter]( const std::string& deviceName, std::pair ids, const std::optional& pci ) { - if (!gpu) + if (!gpuFilter) return true; - return (deviceName == *gpu) - || (ids.first + ":" + ids.second == *gpu) - || (pci && *pci == *gpu); + return (deviceName == *gpuFilter) + || (ids.first + ":" + ids.second == *gpuFilter) + || (pci && *pci == *gpuFilter); }, dll, global.allow_fp16 ); diff --git a/lsfg-vk-layer/src/hooks/layer.hpp b/lsfg-vk-layer/src/hooks/layer.hpp index 4495e8b..3abb03a 100644 --- a/lsfg-vk-layer/src/hooks/layer.hpp +++ b/lsfg-vk-layer/src/hooks/layer.hpp @@ -6,6 +6,8 @@ #include "lsfg-vk-common/configuration/config.hpp" #include "lsfg-vk-common/helpers/pointers.hpp" +#include + #include #include @@ -38,6 +40,10 @@ namespace lsfgvk::layer { /// @throws ls::error if an error occured during backend creation [[nodiscard]] backend::Instance& backend(); + /// set the active GPU name for backend device selection + /// @param name the GPU device name + void setActiveGpu(const std::string& name) { this->active_gpu = name; } + // non-moveable, non-copyable MyVkLayer(const MyVkLayer&) = delete; MyVkLayer& operator=(const MyVkLayer&) = delete; @@ -48,6 +54,7 @@ namespace lsfgvk::layer { ls::WatchedConfig config; std::optional current_profile; + std::string active_gpu; ls::lazy backend_instance; }; diff --git a/lsfg-vk-layer/src/hooks/swapchain.cpp b/lsfg-vk-layer/src/hooks/swapchain.cpp index 406fccd..c6243e2 100644 --- a/lsfg-vk-layer/src/hooks/swapchain.cpp +++ b/lsfg-vk-layer/src/hooks/swapchain.cpp @@ -72,6 +72,10 @@ MyVkSwapchain::MyVkSwapchain(MyVkLayer& layer, MyVkInstance& instance, MyVkDevic this->handle = createFunc(&info); this->swapchainImages = getSwapchainImages(vk, this->handle); + // store for reinitialize + this->extent = info.imageExtent; + this->format = info.imageFormat; + // create virtual swapchain images this->images.reserve(this->swapchainImages.size()); this->availableImages = std::vector(this->swapchainImages.size(), true); @@ -86,16 +90,21 @@ MyVkSwapchain::MyVkSwapchain(MyVkLayer& layer, MyVkInstance& instance, MyVkDevic ); } + // create frame generator + this->generator = std::make_unique(layer, device, this->extent, this->format); + // create thread this->doneSemaphore.emplace(vk, 0); this->thread = std::thread(&MyVkSwapchain::thread_main, this); - - // this->reinitialize(); } -// void MyVkSwapchain::reinitialize() { -// // ... -// } +void MyVkSwapchain::reinitialize() { + // recreate the generator with potentially new profile settings + this->generator = std::make_unique( + this->layer.get(), this->device.get(), + this->extent, this->format + ); +} MyVkSwapchain::~MyVkSwapchain() noexcept { this->running.store(false); @@ -130,9 +139,12 @@ void MyVkSwapchain::thread_main() noexcept { vk::Semaphore presentSemaphore; }; + // allocate enough passes for generated frames + original frame + const size_t generatedCount = this->generator->count(); + const size_t passCount = (this->swapchainImages.size() + 1) * (generatedCount + 1); std::vector passes; - passes.reserve(this->swapchainImages.size() + 1); - for (size_t i = 0; i < this->swapchainImages.size() + 1; i++) { + passes.reserve(passCount); + for (size_t i = 0; i < passCount; i++) { passes.emplace_back(Pass { .acquireSemaphore = vk::Semaphore(vk), .commandBuffer = vk::CommandBuffer(vk), @@ -141,24 +153,82 @@ void MyVkSwapchain::thread_main() noexcept { }); } - try { // FIXME: indentation and stuff - + try { + size_t passIdx{0}; uint64_t counter{1}; while (this->running.load()) { // wait for present signal and fetch the image index - const auto ppi = this->virtual_FetchUPresent(100'1000, counter); + const auto ppi = this->virtual_FetchUPresent(100'000, counter); if (!ppi.has_value()) continue; // timeout after 100us - // acquire a real swapchain image - const auto& pass = passes[counter % passes.size()]; + auto& virtualImage = this->images.at(ppi->idx); + + // 1. PREPARE: Copy virtual image to backend source for frame generation + if (generatedCount > 0) { + const auto& preparePass = passes[passIdx++ % passes.size()]; + const auto& prepareCmdbuf = preparePass.commandBuffer; + + prepareCmdbuf.begin(vk); + const auto [prepareSem, prepareVal] = this->generator->prepare( + const_cast(prepareCmdbuf), virtualImage.handle()); + prepareCmdbuf.end(vk); + + { + const std::scoped_lock lock(offload.mutex); + prepareCmdbuf.submit(vk, + {}, VK_NULL_HANDLE, 0, + {}, prepareSem, prepareVal, + preparePass.copyFence.handle(), offload.queue + ); + } + + // 2. SCHEDULE: Trigger backend frame generation + this->generator->schedule(); + + // wait for prepare to finish before generating frames + if (!preparePass.copyFence.wait(vk, UINT64_MAX)) + throw ls::error("prepare fence wait timed out"); + preparePass.copyFence.reset(vk); + + // 3. GENERATED FRAMES: Present each generated frame + for (size_t frame = 0; frame < generatedCount; frame++) { + const auto& genPass = passes[passIdx++ % passes.size()]; + const uint32_t gen_idx = this->virtual_AcquireNext(genPass.acquireSemaphore); + + const auto& genCmdbuf = genPass.commandBuffer; + genCmdbuf.begin(vk); + const auto [obtainSem, obtainVal] = this->generator->obtain( + const_cast(genCmdbuf), + this->swapchainImages.at(gen_idx)); + genCmdbuf.end(vk); + + { + const std::scoped_lock lock(offload.mutex); + genCmdbuf.submit(vk, + { genPass.acquireSemaphore.handle() }, obtainSem, obtainVal, + { genPass.presentSemaphore.handle() }, VK_NULL_HANDLE, 0, + genPass.copyFence.handle(), offload.queue + ); + } + + // present the generated frame + this->virtual_PresentGenerated(genPass.presentSemaphore, gen_idx); + + // wait for copy completion + if (!genPass.copyFence.wait(vk, UINT64_MAX)) + throw ls::error("generated frame copy fence wait timed out"); + genPass.copyFence.reset(vk); + } + } + + // 4. ORIGINAL FRAME: Acquire real swapchain image and copy virtual -> real + const auto& pass = passes[passIdx++ % passes.size()]; const uint32_t real_idx = this->virtual_AcquireNext(pass.acquireSemaphore); - // copy virtual image into real swapchain image const auto& cmdbuf = pass.commandBuffer; cmdbuf.begin(vk); - auto& virtualImage = this->images.at(ppi->idx); auto& swapchainImage = this->swapchainImages.at(real_idx); cmdbuf.blitImage(vk, @@ -205,7 +275,7 @@ void MyVkSwapchain::thread_main() noexcept { ); } - // present the real swapchain image + // present the original frame (linked to app's present call) this->virtual_PresentLinked(*ppi, pass.presentSemaphore, real_idx); // wait for the copy to finish @@ -213,7 +283,7 @@ void MyVkSwapchain::thread_main() noexcept { throw ls::error("virtual swapchain copy fence wait timed out"); pass.copyFence.reset(vk); - // mark image as available again + // mark virtual image as available again this->virtual_CompleteUPresent(*ppi); } @@ -320,6 +390,42 @@ void MyVkSwapchain::virtual_PresentLinked(const MyVkPresentInfo& original_info, this->doneSemaphore->signal(vk, presentId + 1); } +void MyVkSwapchain::virtual_PresentGenerated(const vk::Semaphore& semaphore, uint32_t idx) { + const auto& vk = this->device.get().vkd(); + + // use FIFO for proper frame pacing of generated frames + const VkPresentModeKHR mode = VK_PRESENT_MODE_FIFO_KHR; + const VkSwapchainPresentModeInfoKHR presentModeInfo{ + .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_KHR, + .swapchainCount = 1, + .pPresentModes = &mode + }; + + const VkPresentInfoKHR presentInfo{ + .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + .pNext = &presentModeInfo, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &semaphore.handle(), + .swapchainCount = 1, + .pSwapchains = &this->handle, + .pImageIndices = &idx, + }; + { + auto& offload = this->device.get().offload(); + + const std::scoped_lock lock(offload.mutex); + const std::scoped_lock lock2(this->swapchainMutex); + + auto res = vk.df().QueuePresentKHR(offload.queue, &presentInfo); + if (res != VK_SUCCESS) { + this->status.store(res); + + if (res != VK_SUBOPTIMAL_KHR) + throw ls::error("vkQueuePresentKHR() failed for generated frame"); + } + } +} + void MyVkSwapchain::virtual_CompleteUPresent(const MyVkPresentInfo& info) { const auto& vk = this->device.get().vkd(); diff --git a/lsfg-vk-layer/src/hooks/swapchain.hpp b/lsfg-vk-layer/src/hooks/swapchain.hpp index 9f70f68..c68839b 100644 --- a/lsfg-vk-layer/src/hooks/swapchain.hpp +++ b/lsfg-vk-layer/src/hooks/swapchain.hpp @@ -4,6 +4,7 @@ #include "device.hpp" #include "instance.hpp" +#include "../generator.hpp" #include "lsfg-vk-common/helpers/pointers.hpp" #include "lsfg-vk-common/vulkan/image.hpp" #include "lsfg-vk-common/vulkan/semaphore.hpp" @@ -12,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -105,11 +107,20 @@ namespace lsfgvk::layer { /// mark a present from the underlying swapchain as complete /// @param info present information void virtual_CompleteUPresent(const MyVkPresentInfo& info); + + /// present a generated frame to the real swapchain + /// @param semaphore semaphore to wait on before presenting + /// @param idx index of the real swapchain image to present + void virtual_PresentGenerated(const vk::Semaphore& semaphore, uint32_t idx); private: ls::R layer; ls::R instance; ls::R device; + std::unique_ptr generator; + VkExtent2D extent; + VkFormat format; + vk::TimelineSemaphore presentSemaphore; uint64_t presentIndex;