diff --git a/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp b/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp index 40510f2..9e7fff4 100644 --- a/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp +++ b/lsfg-vk-common/include/lsfg-vk-common/vulkan/vulkan.hpp @@ -15,6 +15,38 @@ #include #include +// Compatibility shim for VK_KHR_present_wait2 (proposed extension, not yet in SDK) +#ifndef VK_KHR_present_wait2 +#define VK_KHR_present_wait2 1 +#define VK_STRUCTURE_TYPE_PRESENT_WAIT_2_INFO_KHR ((VkStructureType)1000572000) +#define VK_STRUCTURE_TYPE_PRESENT_ID_2_KHR ((VkStructureType)1000572001) + +typedef struct VkPresentWait2InfoKHR { + VkStructureType sType; + const void* pNext; + uint64_t presentId; + uint64_t timeout; +} VkPresentWait2InfoKHR; + +typedef struct VkPresentId2KHR { + VkStructureType sType; + const void* pNext; + uint32_t swapchainCount; + const uint64_t* pPresentIds; +} VkPresentId2KHR; + +typedef VkResult (VKAPI_PTR *PFN_vkWaitForPresent2KHR)(VkDevice device, VkSwapchainKHR swapchain, const VkPresentWait2InfoKHR* pPresentWaitInfo); +#endif + +// Compatibility shim for VK_KHR_swapchain_maintenance1 (uses EXT naming in older SDKs) +#ifndef VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_KHR +#define VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_KHR VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_EXT +#define VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_KHR VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_FENCE_INFO_EXT +typedef VkSwapchainPresentModeInfoEXT VkSwapchainPresentModeInfoKHR; +typedef VkSwapchainPresentFenceInfoEXT VkSwapchainPresentFenceInfoKHR; +typedef VkReleaseSwapchainImagesInfoEXT VkReleaseSwapchainImagesInfoKHR; +#endif + namespace vk { /// vulkan instance function pointers diff --git a/lsfg-vk-common/src/vulkan/command_buffer.cpp b/lsfg-vk-common/src/vulkan/command_buffer.cpp index a3baee5..af8c0ec 100644 --- a/lsfg-vk-common/src/vulkan/command_buffer.cpp +++ b/lsfg-vk-common/src/vulkan/command_buffer.cpp @@ -210,13 +210,15 @@ void CommandBuffer::submit(const vk::Vulkan& vk, waitSemaphores.push_back(waitTimelineSemaphore); std::vector waitValues(waitSemaphores.size(), 0); - waitValues.back() = waitValue; + if (!waitValues.empty()) + waitValues.back() = waitValue; if (signalTimelineSemaphore) signalSemaphores.push_back(signalTimelineSemaphore); std::vector signalValues(signalSemaphores.size(), 0); - signalValues.back() = signalValue; + if (!signalValues.empty()) + signalValues.back() = signalValue; // create submit info const VkTimelineSemaphoreSubmitInfo timelineInfo{ diff --git a/lsfg-vk-layer/src/entrypoint.cpp b/lsfg-vk-layer/src/entrypoint.cpp index feb856d..b5ecef4 100644 --- a/lsfg-vk-layer/src/entrypoint.cpp +++ b/lsfg-vk-layer/src/entrypoint.cpp @@ -176,6 +176,13 @@ namespace { } try { + // Get physical device name for backend GPU selection + VkPhysicalDeviceProperties2 props{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 + }; + myvk_instance.funcs().GetPhysicalDeviceProperties2(physdev, &props); + myvk_layer.setActiveGpu(props.properties.deviceName); + auto myvk_device = std::make_unique(myvk_layer, myvk_instance, physdev, *info, layer_info->GetDeviceProcAddr, setLoaderData, diff --git a/lsfg-vk-layer/src/hooks/device.cpp b/lsfg-vk-layer/src/hooks/device.cpp index 0b6ccb6..600ce07 100644 --- a/lsfg-vk-layer/src/hooks/device.cpp +++ b/lsfg-vk-layer/src/hooks/device.cpp @@ -64,6 +64,7 @@ MyVkDevice::MyVkDevice(MyVkLayer& layer, MyVkInstance& instance, info.ppEnabledExtensionNames, info.enabledExtensionCount, { + "VK_KHR_swapchain", "VK_KHR_external_memory", "VK_KHR_external_memory_fd", "VK_KHR_external_semaphore", diff --git a/lsfg-vk-layer/src/hooks/layer.cpp b/lsfg-vk-layer/src/hooks/layer.cpp index 17c028d..d6613b5 100644 --- a/lsfg-vk-layer/src/hooks/layer.cpp +++ b/lsfg-vk-layer/src/hooks/layer.cpp @@ -72,18 +72,23 @@ backend::Instance& MyVkLayer::backend() { else dll = ls::findShaderDll(); + // Use profile.gpu if set, otherwise use the active GPU from layer device + const std::optional gpuFilter = profile.gpu.has_value() + ? profile.gpu + : (this->active_gpu.empty() ? std::nullopt : std::optional(this->active_gpu)); + this->backend_instance.emplace( - [gpu = profile.gpu]( + [gpuFilter]( const std::string& deviceName, std::pair ids, const std::optional& pci ) { - if (!gpu) + if (!gpuFilter) return true; - return (deviceName == *gpu) - || (ids.first + ":" + ids.second == *gpu) - || (pci && *pci == *gpu); + return (deviceName == *gpuFilter) + || (ids.first + ":" + ids.second == *gpuFilter) + || (pci && *pci == *gpuFilter); }, dll, global.allow_fp16 ); diff --git a/lsfg-vk-layer/src/hooks/layer.hpp b/lsfg-vk-layer/src/hooks/layer.hpp index 4495e8b..3abb03a 100644 --- a/lsfg-vk-layer/src/hooks/layer.hpp +++ b/lsfg-vk-layer/src/hooks/layer.hpp @@ -6,6 +6,8 @@ #include "lsfg-vk-common/configuration/config.hpp" #include "lsfg-vk-common/helpers/pointers.hpp" +#include + #include #include @@ -38,6 +40,10 @@ namespace lsfgvk::layer { /// @throws ls::error if an error occured during backend creation [[nodiscard]] backend::Instance& backend(); + /// set the active GPU name for backend device selection + /// @param name the GPU device name + void setActiveGpu(const std::string& name) { this->active_gpu = name; } + // non-moveable, non-copyable MyVkLayer(const MyVkLayer&) = delete; MyVkLayer& operator=(const MyVkLayer&) = delete; @@ -48,6 +54,7 @@ namespace lsfgvk::layer { ls::WatchedConfig config; std::optional current_profile; + std::string active_gpu; ls::lazy backend_instance; }; diff --git a/lsfg-vk-layer/src/hooks/swapchain.cpp b/lsfg-vk-layer/src/hooks/swapchain.cpp index 406fccd..c6243e2 100644 --- a/lsfg-vk-layer/src/hooks/swapchain.cpp +++ b/lsfg-vk-layer/src/hooks/swapchain.cpp @@ -72,6 +72,10 @@ MyVkSwapchain::MyVkSwapchain(MyVkLayer& layer, MyVkInstance& instance, MyVkDevic this->handle = createFunc(&info); this->swapchainImages = getSwapchainImages(vk, this->handle); + // store for reinitialize + this->extent = info.imageExtent; + this->format = info.imageFormat; + // create virtual swapchain images this->images.reserve(this->swapchainImages.size()); this->availableImages = std::vector(this->swapchainImages.size(), true); @@ -86,16 +90,21 @@ MyVkSwapchain::MyVkSwapchain(MyVkLayer& layer, MyVkInstance& instance, MyVkDevic ); } + // create frame generator + this->generator = std::make_unique(layer, device, this->extent, this->format); + // create thread this->doneSemaphore.emplace(vk, 0); this->thread = std::thread(&MyVkSwapchain::thread_main, this); - - // this->reinitialize(); } -// void MyVkSwapchain::reinitialize() { -// // ... -// } +void MyVkSwapchain::reinitialize() { + // recreate the generator with potentially new profile settings + this->generator = std::make_unique( + this->layer.get(), this->device.get(), + this->extent, this->format + ); +} MyVkSwapchain::~MyVkSwapchain() noexcept { this->running.store(false); @@ -130,9 +139,12 @@ void MyVkSwapchain::thread_main() noexcept { vk::Semaphore presentSemaphore; }; + // allocate enough passes for generated frames + original frame + const size_t generatedCount = this->generator->count(); + const size_t passCount = (this->swapchainImages.size() + 1) * (generatedCount + 1); std::vector passes; - passes.reserve(this->swapchainImages.size() + 1); - for (size_t i = 0; i < this->swapchainImages.size() + 1; i++) { + passes.reserve(passCount); + for (size_t i = 0; i < passCount; i++) { passes.emplace_back(Pass { .acquireSemaphore = vk::Semaphore(vk), .commandBuffer = vk::CommandBuffer(vk), @@ -141,24 +153,82 @@ void MyVkSwapchain::thread_main() noexcept { }); } - try { // FIXME: indentation and stuff - + try { + size_t passIdx{0}; uint64_t counter{1}; while (this->running.load()) { // wait for present signal and fetch the image index - const auto ppi = this->virtual_FetchUPresent(100'1000, counter); + const auto ppi = this->virtual_FetchUPresent(100'000, counter); if (!ppi.has_value()) continue; // timeout after 100us - // acquire a real swapchain image - const auto& pass = passes[counter % passes.size()]; + auto& virtualImage = this->images.at(ppi->idx); + + // 1. PREPARE: Copy virtual image to backend source for frame generation + if (generatedCount > 0) { + const auto& preparePass = passes[passIdx++ % passes.size()]; + const auto& prepareCmdbuf = preparePass.commandBuffer; + + prepareCmdbuf.begin(vk); + const auto [prepareSem, prepareVal] = this->generator->prepare( + const_cast(prepareCmdbuf), virtualImage.handle()); + prepareCmdbuf.end(vk); + + { + const std::scoped_lock lock(offload.mutex); + prepareCmdbuf.submit(vk, + {}, VK_NULL_HANDLE, 0, + {}, prepareSem, prepareVal, + preparePass.copyFence.handle(), offload.queue + ); + } + + // 2. SCHEDULE: Trigger backend frame generation + this->generator->schedule(); + + // wait for prepare to finish before generating frames + if (!preparePass.copyFence.wait(vk, UINT64_MAX)) + throw ls::error("prepare fence wait timed out"); + preparePass.copyFence.reset(vk); + + // 3. GENERATED FRAMES: Present each generated frame + for (size_t frame = 0; frame < generatedCount; frame++) { + const auto& genPass = passes[passIdx++ % passes.size()]; + const uint32_t gen_idx = this->virtual_AcquireNext(genPass.acquireSemaphore); + + const auto& genCmdbuf = genPass.commandBuffer; + genCmdbuf.begin(vk); + const auto [obtainSem, obtainVal] = this->generator->obtain( + const_cast(genCmdbuf), + this->swapchainImages.at(gen_idx)); + genCmdbuf.end(vk); + + { + const std::scoped_lock lock(offload.mutex); + genCmdbuf.submit(vk, + { genPass.acquireSemaphore.handle() }, obtainSem, obtainVal, + { genPass.presentSemaphore.handle() }, VK_NULL_HANDLE, 0, + genPass.copyFence.handle(), offload.queue + ); + } + + // present the generated frame + this->virtual_PresentGenerated(genPass.presentSemaphore, gen_idx); + + // wait for copy completion + if (!genPass.copyFence.wait(vk, UINT64_MAX)) + throw ls::error("generated frame copy fence wait timed out"); + genPass.copyFence.reset(vk); + } + } + + // 4. ORIGINAL FRAME: Acquire real swapchain image and copy virtual -> real + const auto& pass = passes[passIdx++ % passes.size()]; const uint32_t real_idx = this->virtual_AcquireNext(pass.acquireSemaphore); - // copy virtual image into real swapchain image const auto& cmdbuf = pass.commandBuffer; cmdbuf.begin(vk); - auto& virtualImage = this->images.at(ppi->idx); auto& swapchainImage = this->swapchainImages.at(real_idx); cmdbuf.blitImage(vk, @@ -205,7 +275,7 @@ void MyVkSwapchain::thread_main() noexcept { ); } - // present the real swapchain image + // present the original frame (linked to app's present call) this->virtual_PresentLinked(*ppi, pass.presentSemaphore, real_idx); // wait for the copy to finish @@ -213,7 +283,7 @@ void MyVkSwapchain::thread_main() noexcept { throw ls::error("virtual swapchain copy fence wait timed out"); pass.copyFence.reset(vk); - // mark image as available again + // mark virtual image as available again this->virtual_CompleteUPresent(*ppi); } @@ -320,6 +390,42 @@ void MyVkSwapchain::virtual_PresentLinked(const MyVkPresentInfo& original_info, this->doneSemaphore->signal(vk, presentId + 1); } +void MyVkSwapchain::virtual_PresentGenerated(const vk::Semaphore& semaphore, uint32_t idx) { + const auto& vk = this->device.get().vkd(); + + // use FIFO for proper frame pacing of generated frames + const VkPresentModeKHR mode = VK_PRESENT_MODE_FIFO_KHR; + const VkSwapchainPresentModeInfoKHR presentModeInfo{ + .sType = VK_STRUCTURE_TYPE_SWAPCHAIN_PRESENT_MODE_INFO_KHR, + .swapchainCount = 1, + .pPresentModes = &mode + }; + + const VkPresentInfoKHR presentInfo{ + .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, + .pNext = &presentModeInfo, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &semaphore.handle(), + .swapchainCount = 1, + .pSwapchains = &this->handle, + .pImageIndices = &idx, + }; + { + auto& offload = this->device.get().offload(); + + const std::scoped_lock lock(offload.mutex); + const std::scoped_lock lock2(this->swapchainMutex); + + auto res = vk.df().QueuePresentKHR(offload.queue, &presentInfo); + if (res != VK_SUCCESS) { + this->status.store(res); + + if (res != VK_SUBOPTIMAL_KHR) + throw ls::error("vkQueuePresentKHR() failed for generated frame"); + } + } +} + void MyVkSwapchain::virtual_CompleteUPresent(const MyVkPresentInfo& info) { const auto& vk = this->device.get().vkd(); diff --git a/lsfg-vk-layer/src/hooks/swapchain.hpp b/lsfg-vk-layer/src/hooks/swapchain.hpp index 9f70f68..c68839b 100644 --- a/lsfg-vk-layer/src/hooks/swapchain.hpp +++ b/lsfg-vk-layer/src/hooks/swapchain.hpp @@ -4,6 +4,7 @@ #include "device.hpp" #include "instance.hpp" +#include "../generator.hpp" #include "lsfg-vk-common/helpers/pointers.hpp" #include "lsfg-vk-common/vulkan/image.hpp" #include "lsfg-vk-common/vulkan/semaphore.hpp" @@ -12,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -105,11 +107,20 @@ namespace lsfgvk::layer { /// mark a present from the underlying swapchain as complete /// @param info present information void virtual_CompleteUPresent(const MyVkPresentInfo& info); + + /// present a generated frame to the real swapchain + /// @param semaphore semaphore to wait on before presenting + /// @param idx index of the real swapchain image to present + void virtual_PresentGenerated(const vk::Semaphore& semaphore, uint32_t idx); private: ls::R layer; ls::R instance; ls::R device; + std::unique_ptr generator; + VkExtent2D extent; + VkFormat format; + vk::TimelineSemaphore presentSemaphore; uint64_t presentIndex;