feat(frame-pacing): integrate Generator into virtual swapchain

Integrate frame generation into the virtual swapchain's presentation thread. The thread now copies virtual images to backend sources, schedules frame generation, and presents generated frames before the original frame. Key changes: - Add Generator to MyVkSwapchain for frame generation - Implement frame generation loop in thread_main() - Add virtual_PresentGenerated() helper for generated frame presentation - Add VK_KHR_swapchain to required device extensions Bug fixes: - Fix backend GPU selection on multi-GPU systems by passing the layer's active GPU name to the backend device picker. This ensures DMA-BUF memory sharing works correctly between layer and backend. - Fix crash in CommandBuffer::submit() when waitSemaphores is empty by adding empty vector check before accessing .back() - Add VK_KHR_present_wait2 compatibility shims for older Vulkan SDKs
2026-02-02 03:45:55 +00:00 · 2026-01-09 22:47:51 -08:00 · 2026-01-09 22:47:51 -08:00 · 980fa68bec
commit 980fa68bec
parent 18a39ce5e5
8 changed files with 194 additions and 23 deletions
--- a/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp
+++ b/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp
@ -15,6 +15,38 @@
 #include <vulkan/vulkan_core.h>
 #include <vulkan/vk_layer.h>

+// Compatibility shim for VK_KHR_present_wait2 (proposed extension, not yet in SDK)
+#ifndef VK_KHR_present_wait2
+#define VK_KHR_present_wait2 1
+#define VK_STRUCTURE_TYPE_PRESENT_WAIT_2_INFO_KHR ((VkStructureType)1000572000)
+#define VK_STRUCTURE_TYPE_PRESENT_ID_2_KHR ((VkStructureType)1000572001)
+
+typedef struct VkPresentWait2InfoKHR {
+    VkStructureType    sType;
+    const void*        pNext;
+    uint64_t           presentId;
+    uint64_t           timeout;
+} VkPresentWait2InfoKHR;
+
+typedef struct VkPresentId2KHR {
+    VkStructureType    sType;
+    const void*        pNext;
+    uint32_t           swapchainCount;
+    const uint64_t*    pPresentIds;
+} VkPresentId2KHR;
+
+typedef VkResult (VKAPI_PTR *PFN_vkWaitForPresent2KHR)(VkDevice device, VkSwapchainKHR swapchain, const VkPresentWait2InfoKHR* pPresentWaitInfo);
+#endif
+
+// Compatibility shim for VK_KHR_swapchain_maintenance1 (uses EXT naming in older SDKs)
+#ifndef VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_KHR
+#define VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_KHR VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT
+#define VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_KHR VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_EXT
+typedef VkSwapchainPresentModeInfoEXT VkSwapchainPresentModeInfoKHR;
+typedef VkSwapchainPresentFenceInfoEXT VkSwapchainPresentFenceInfoKHR;
+typedef VkReleaseSwapchainImagesInfoEXT VkReleaseSwapchainImagesInfoKHR;
+#endif
+
 namespace vk {

    /// vulkan instance function pointers
--- a/lsfg-vk-common/src/vulkan/command_buffer.cpp
+++ b/lsfg-vk-common/src/vulkan/command_buffer.cpp
@ -210,13 +210,15 @@ void CommandBuffer::submit(const vk::Vulkan& vk,
        waitSemaphores.push_back(waitTimelineSemaphore);

    std::vector<uint64_t> waitValues(waitSemaphores.size(), 0);
-    waitValues.back() = waitValue;
+    if (!waitValues.empty())
+        waitValues.back() = waitValue;

    if (signalTimelineSemaphore)
        signalSemaphores.push_back(signalTimelineSemaphore);

    std::vector<uint64_t> signalValues(signalSemaphores.size(), 0);
-    signalValues.back() = signalValue;
+    if (!signalValues.empty())
+        signalValues.back() = signalValue;

    // create submit info
    const VkTimelineSemaphoreSubmitInfo timelineInfo{
--- a/lsfg-vk-layer/src/entrypoint.cpp
+++ b/lsfg-vk-layer/src/entrypoint.cpp
@ -176,6 +176,13 @@ namespace {
        }

        try {
+            // Get physical device name for backend GPU selection
+            VkPhysicalDeviceProperties2 props{
+                .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2
+            };
+            myvk_instance.funcs().GetPhysicalDeviceProperties2(physdev, &props);
+            myvk_layer.setActiveGpu(props.properties.deviceName);
+
            auto myvk_device = std::make_unique<MyVkDevice>(myvk_layer, myvk_instance,
                physdev, *info,
                layer_info->GetDeviceProcAddr, setLoaderData,
--- a/lsfg-vk-layer/src/hooks/device.cpp
+++ b/lsfg-vk-layer/src/hooks/device.cpp
@ -64,6 +64,7 @@ MyVkDevice::MyVkDevice(MyVkLayer& layer, MyVkInstance& instance,
        info.ppEnabledExtensionNames,
        info.enabledExtensionCount,
        {
+            "VK_KHR_swapchain",
            "VK_KHR_external_memory",
            "VK_KHR_external_memory_fd",
            "VK_KHR_external_semaphore",
--- a/lsfg-vk-layer/src/hooks/layer.cpp
+++ b/lsfg-vk-layer/src/hooks/layer.cpp
@ -72,18 +72,23 @@ backend::Instance& MyVkLayer::backend() {
        else
            dll = ls::findShaderDll();

+        // Use profile.gpu if set, otherwise use the active GPU from layer device
+        const std::optional<std::string> gpuFilter = profile.gpu.has_value()
+            ? profile.gpu
+            : (this->active_gpu.empty() ? std::nullopt : std::optional<std::string>(this->active_gpu));
+
        this->backend_instance.emplace(
-            [gpu = profile.gpu](
+            [gpuFilter](
                const std::string& deviceName,
                std::pair<const std::string&, const std::string&> ids,
                const std::optional<std::string>& pci
            ) {
-                if (!gpu)
+                if (!gpuFilter)
                    return true;

-                return (deviceName == *gpu)
-                    || (ids.first + ":" + ids.second == *gpu)
-                    || (pci && *pci == *gpu);
+                return (deviceName == *gpuFilter)
+                    || (ids.first + ":" + ids.second == *gpuFilter)
+                    || (pci && *pci == *gpuFilter);
            },
            dll, global.allow_fp16
        );
--- a/lsfg-vk-layer/src/hooks/layer.hpp
+++ b/lsfg-vk-layer/src/hooks/layer.hpp
@ -6,6 +6,8 @@
 #include "lsfg-vk-common/configuration/config.hpp"
 #include "lsfg-vk-common/helpers/pointers.hpp"

+#include <string>
+
 #include <vulkan/vk_layer.h>
 #include <vulkan/vulkan_core.h>

@ -38,6 +40,10 @@ namespace lsfgvk::layer {
        /// @throws ls::error if an error occured during backend creation
        [[nodiscard]] backend::Instance& backend();

+        /// set the active GPU name for backend device selection
+        /// @param name the GPU device name
+        void setActiveGpu(const std::string& name) { this->active_gpu = name; }
+
        // non-moveable, non-copyable
        MyVkLayer(const MyVkLayer&) = delete;
        MyVkLayer& operator=(const MyVkLayer&) = delete;
@ -48,6 +54,7 @@ namespace lsfgvk::layer {
        ls::WatchedConfig config;
        std::optional<ls::GameConf> current_profile;

+        std::string active_gpu;
        ls::lazy<backend::Instance> backend_instance;
    };

--- a/lsfg-vk-layer/src/hooks/swapchain.cpp
+++ b/lsfg-vk-layer/src/hooks/swapchain.cpp
@ -72,6 +72,10 @@ MyVkSwapchain::MyVkSwapchain(MyVkLayer& layer, MyVkInstance& instance, MyVkDevic
    this->handle = createFunc(&info);
    this->swapchainImages = getSwapchainImages(vk, this->handle);

+    // store for reinitialize
+    this->extent = info.imageExtent;
+    this->format = info.imageFormat;
+
    // create virtual swapchain images
    this->images.reserve(this->swapchainImages.size());
    this->availableImages = std::vector<bool>(this->swapchainImages.size(), true);
@ -86,16 +90,21 @@ MyVkSwapchain::MyVkSwapchain(MyVkLayer& layer, MyVkInstance& instance, MyVkDevic
        );
    }

+    // create frame generator
+    this->generator = std::make_unique<Generator>(layer, device, this->extent, this->format);
+
    // create thread
    this->doneSemaphore.emplace(vk, 0);
    this->thread = std::thread(&MyVkSwapchain::thread_main, this);
-
-    // this->reinitialize();
 }

-// void MyVkSwapchain::reinitialize() {
-//     // ...
-// }
+void MyVkSwapchain::reinitialize() {
+    // recreate the generator with potentially new profile settings
+    this->generator = std::make_unique<Generator>(
+        this->layer.get(), this->device.get(),
+        this->extent, this->format
+    );
+}

 MyVkSwapchain::~MyVkSwapchain() noexcept {
    this->running.store(false);
@ -130,9 +139,12 @@ void MyVkSwapchain::thread_main() noexcept {
        vk::Semaphore presentSemaphore;
    };

+    // allocate enough passes for generated frames + original frame
+    const size_t generatedCount = this->generator->count();
+    const size_t passCount = (this->swapchainImages.size() + 1) * (generatedCount + 1);
    std::vector<Pass> passes;
-    passes.reserve(this->swapchainImages.size() + 1);
-    for (size_t i = 0; i < this->swapchainImages.size() + 1; i++) {
+    passes.reserve(passCount);
+    for (size_t i = 0; i < passCount; i++) {
        passes.emplace_back(Pass {
            .acquireSemaphore = vk::Semaphore(vk),
            .commandBuffer = vk::CommandBuffer(vk),
@ -141,24 +153,82 @@ void MyVkSwapchain::thread_main() noexcept {
        });
    }

-    try { // FIXME: indentation and stuff
-
+    try {
+    size_t passIdx{0};
    uint64_t counter{1};
    while (this->running.load()) {
        // wait for present signal and fetch the image index
-        const auto ppi = this->virtual_FetchUPresent(100'1000, counter);
+        const auto ppi = this->virtual_FetchUPresent(100'000, counter);
        if (!ppi.has_value())
            continue; // timeout after 100us

-        // acquire a real swapchain image
-        const auto& pass = passes[counter % passes.size()];
+        auto& virtualImage = this->images.at(ppi->idx);
+
+        // 1. PREPARE: Copy virtual image to backend source for frame generation
+        if (generatedCount > 0) {
+            const auto& preparePass = passes[passIdx++ % passes.size()];
+            const auto& prepareCmdbuf = preparePass.commandBuffer;
+
+            prepareCmdbuf.begin(vk);
+            const auto [prepareSem, prepareVal] = this->generator->prepare(
+                const_cast<vk::CommandBuffer&>(prepareCmdbuf), virtualImage.handle());
+            prepareCmdbuf.end(vk);
+
+            {
+                const std::scoped_lock<std::mutex> lock(offload.mutex);
+                prepareCmdbuf.submit(vk,
+                    {}, VK_NULL_HANDLE, 0,
+                    {}, prepareSem, prepareVal,
+                    preparePass.copyFence.handle(), offload.queue
+                );
+            }
+
+            // 2. SCHEDULE: Trigger backend frame generation
+            this->generator->schedule();
+
+            // wait for prepare to finish before generating frames
+            if (!preparePass.copyFence.wait(vk, UINT64_MAX))
+                throw ls::error("prepare fence wait timed out");
+            preparePass.copyFence.reset(vk);
+
+            // 3. GENERATED FRAMES: Present each generated frame
+            for (size_t frame = 0; frame < generatedCount; frame++) {
+                const auto& genPass = passes[passIdx++ % passes.size()];
+                const uint32_t gen_idx = this->virtual_AcquireNext(genPass.acquireSemaphore);
+
+                const auto& genCmdbuf = genPass.commandBuffer;
+                genCmdbuf.begin(vk);
+                const auto [obtainSem, obtainVal] = this->generator->obtain(
+                    const_cast<vk::CommandBuffer&>(genCmdbuf),
+                    this->swapchainImages.at(gen_idx));
+                genCmdbuf.end(vk);
+
+                {
+                    const std::scoped_lock<std::mutex> lock(offload.mutex);
+                    genCmdbuf.submit(vk,
+                        { genPass.acquireSemaphore.handle() }, obtainSem, obtainVal,
+                        { genPass.presentSemaphore.handle() }, VK_NULL_HANDLE, 0,
+                        genPass.copyFence.handle(), offload.queue
+                    );
+                }
+
+                // present the generated frame
+                this->virtual_PresentGenerated(genPass.presentSemaphore, gen_idx);
+
+                // wait for copy completion
+                if (!genPass.copyFence.wait(vk, UINT64_MAX))
+                    throw ls::error("generated frame copy fence wait timed out");
+                genPass.copyFence.reset(vk);
+            }
+        }
+
+        // 4. ORIGINAL FRAME: Acquire real swapchain image and copy virtual -> real
+        const auto& pass = passes[passIdx++ % passes.size()];
        const uint32_t real_idx = this->virtual_AcquireNext(pass.acquireSemaphore);

-        // copy virtual image into real swapchain image
        const auto& cmdbuf = pass.commandBuffer;
        cmdbuf.begin(vk);

-        auto& virtualImage = this->images.at(ppi->idx);
        auto& swapchainImage = this->swapchainImages.at(real_idx);

        cmdbuf.blitImage(vk,
@ -205,7 +275,7 @@ void MyVkSwapchain::thread_main() noexcept {
            );
        }

-        // present the real swapchain image
+        // present the original frame (linked to app's present call)
        this->virtual_PresentLinked(*ppi, pass.presentSemaphore, real_idx);

        // wait for the copy to finish
@ -213,7 +283,7 @@ void MyVkSwapchain::thread_main() noexcept {
            throw ls::error("virtual swapchain copy fence wait timed out");
        pass.copyFence.reset(vk);

-        // mark image as available again
+        // mark virtual image as available again
        this->virtual_CompleteUPresent(*ppi);
    }

@ -320,6 +390,42 @@ void MyVkSwapchain::virtual_PresentLinked(const MyVkPresentInfo& original_info,
        this->doneSemaphore->signal(vk, presentId + 1);
 }

+void MyVkSwapchain::virtual_PresentGenerated(const vk::Semaphore& semaphore, uint32_t idx) {
+    const auto& vk = this->device.get().vkd();
+
+    // use FIFO for proper frame pacing of generated frames
+    const VkPresentModeKHR mode = VK_PRESENT_MODE_FIFO_KHR;
+    const VkSwapchainPresentModeInfoKHR presentModeInfo{
+        .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_KHR,
+        .swapchainCount = 1,
+        .pPresentModes = &mode
+    };
+
+    const VkPresentInfoKHR presentInfo{
+        .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
+        .pNext = &presentModeInfo,
+        .waitSemaphoreCount = 1,
+        .pWaitSemaphores = &semaphore.handle(),
+        .swapchainCount = 1,
+        .pSwapchains = &this->handle,
+        .pImageIndices = &idx,
+    };
+    {
+        auto& offload = this->device.get().offload();
+
+        const std::scoped_lock<std::mutex> lock(offload.mutex);
+        const std::scoped_lock<std::mutex> lock2(this->swapchainMutex);
+
+        auto res = vk.df().QueuePresentKHR(offload.queue, &presentInfo);
+        if (res != VK_SUCCESS) {
+            this->status.store(res);
+
+            if (res != VK_SUBOPTIMAL_KHR)
+                throw ls::error("vkQueuePresentKHR() failed for generated frame");
+        }
+    }
+}
+
 void MyVkSwapchain::virtual_CompleteUPresent(const MyVkPresentInfo& info) {
    const auto& vk = this->device.get().vkd();

--- a/lsfg-vk-layer/src/hooks/swapchain.hpp
+++ b/lsfg-vk-layer/src/hooks/swapchain.hpp
@ -4,6 +4,7 @@

 #include "device.hpp"
 #include "instance.hpp"
+#include "../generator.hpp"
 #include "lsfg-vk-common/helpers/pointers.hpp"
 #include "lsfg-vk-common/vulkan/image.hpp"
 #include "lsfg-vk-common/vulkan/semaphore.hpp"
@ -12,6 +13,7 @@
 #include <atomic>
 #include <cstdint>
 #include <functional>
+#include <memory>
 #include <mutex>
 #include <optional>
 #include <queue>
@ -105,11 +107,20 @@ namespace lsfgvk::layer {
        /// mark a present from the underlying swapchain as complete
        /// @param info present information
        void virtual_CompleteUPresent(const MyVkPresentInfo& info);
+
+        /// present a generated frame to the real swapchain
+        /// @param semaphore semaphore to wait on before presenting
+        /// @param idx index of the real swapchain image to present
+        void virtual_PresentGenerated(const vk::Semaphore& semaphore, uint32_t idx);
    private:
        ls::R<MyVkLayer> layer;
        ls::R<MyVkInstance> instance;
        ls::R<MyVkDevice> device;

+        std::unique_ptr<Generator> generator;
+        VkExtent2D extent;
+        VkFormat format;
+
        vk::TimelineSemaphore presentSemaphore;
        uint64_t presentIndex;