From 1a9925129becb632e1ff3e1227c10b5120f49197 Mon Sep 17 00:00:00 2001 From: PancakeTAS Date: Tue, 1 Jul 2025 03:38:11 +0200 Subject: [PATCH] temporal logic for downsample & merge --- include/lsfg.hpp | 2 +- include/shaderchains/downsample.hpp | 12 +++++----- include/shaderchains/merge.hpp | 11 ++++++---- src/lsfg.cpp | 22 +++++++++---------- src/shaderchains/downsample.cpp | 34 +++++++++++++++++------------ src/shaderchains/merge.cpp | 31 ++++++++++++++------------ 6 files changed, 63 insertions(+), 49 deletions(-) diff --git a/include/lsfg.hpp b/include/lsfg.hpp index 1a459e7..ea3459b 100644 --- a/include/lsfg.hpp +++ b/include/lsfg.hpp @@ -84,7 +84,7 @@ namespace LSFG { Generator& operator=(Generator&&) = default; ~Generator() = default; private: - Core::Image fullFrame0, fullFrame1; // next/prev for fc % 2 == 0 + Core::Image inImg_0, inImg_1; // inImg_0 is next (inImg_1 prev) when fc % 2 == 0 uint64_t fc{0}; Shaderchains::Downsample downsampleChain; // FIXME: get rid of default constructors (+ core) diff --git a/include/shaderchains/downsample.hpp b/include/shaderchains/downsample.hpp index f799480..fd7e280 100644 --- a/include/shaderchains/downsample.hpp +++ b/include/shaderchains/downsample.hpp @@ -28,21 +28,23 @@ namespace LSFG::Shaderchains { /// /// @param device The Vulkan device to create the resources on. /// @param pool The descriptor pool to allocate in. - /// @param inImg The input image to downsample. + /// @param inImg_0 The next full image to downsample (when fc % 2 == 0) + /// @param inImg_1 The next full image to downsample (when fc % 2 == 1) /// /// @throws LSFG::vulkan_error if resource creation fails. /// Downsample(const Device& device, const Core::DescriptorPool& pool, - Core::Image inImg); + Core::Image inImg_0, Core::Image inImg_1); /// /// Dispatch the shaderchain. /// /// @param buf The command buffer to use for dispatching. + /// @param fc The frame count, used to select the input image. /// /// @throws std::logic_error if the command buffer is not recording. /// - void Dispatch(const Core::CommandBuffer& buf); + void Dispatch(const Core::CommandBuffer& buf, uint64_t fc); /// Get the output images. [[nodiscard]] const auto& getOutImages() const { return this->outImgs; } @@ -56,10 +58,10 @@ namespace LSFG::Shaderchains { private: Core::ShaderModule shaderModule; Core::Pipeline pipeline; - Core::DescriptorSet descriptorSet; + std::array descriptorSets; // one for each input image Core::Buffer buffer; - Core::Image inImg; + Core::Image inImg_0, inImg_1; std::array outImgs; }; diff --git a/include/shaderchains/merge.hpp b/include/shaderchains/merge.hpp index 7ad12da..679522d 100644 --- a/include/shaderchains/merge.hpp +++ b/include/shaderchains/merge.hpp @@ -10,6 +10,8 @@ #include "core/shadermodule.hpp" #include "device.hpp" +#include + namespace LSFG::Shaderchains { /// @@ -27,8 +29,8 @@ namespace LSFG::Shaderchains { /// /// @param device The Vulkan device to create the resources on. /// @param pool The descriptor pool to use for descriptor sets. - /// @param inImg1 The first frame texture - /// @param inImg2 The second frame texture + /// @param inImg1 The prev full image when fc % 2 == 0 + /// @param inImg2 The next full image when fc % 2 == 0 /// @param inImg3 The first related input texture /// @param inImg4 The second related input texture /// @param inImg5 The third related input texture @@ -46,10 +48,11 @@ namespace LSFG::Shaderchains { /// Dispatch the shaderchain. /// /// @param buf The command buffer to use for dispatching. + /// @param fc The frame count, used to select the input images. /// /// @throws std::logic_error if the command buffer is not recording. /// - void Dispatch(const Core::CommandBuffer& buf); + void Dispatch(const Core::CommandBuffer& buf, uint64_t fc); /// Get the output image [[nodiscard]] const auto& getOutImage() const { return this->outImg; } @@ -63,7 +66,7 @@ namespace LSFG::Shaderchains { private: Core::ShaderModule shaderModule; Core::Pipeline pipeline; - Core::DescriptorSet descriptorSet; + std::array descriptorSets; // one for each input combination Core::Buffer buffer; Core::Image inImg1; diff --git a/src/lsfg.cpp b/src/lsfg.cpp index 98b3a7f..130d95c 100644 --- a/src/lsfg.cpp +++ b/src/lsfg.cpp @@ -9,8 +9,8 @@ Generator::Generator(const Context& context) { // TODO: temporal frames // create shader chains - // this->downsampleChain = Shaderchains::Downsample(context.device, context.descPool, - // this->fullFrame0, this->fullFrame1); + this->downsampleChain = Shaderchains::Downsample(context.device, context.descPool, + this->inImg_0, this->inImg_1); // for (size_t i = 0; i < 7; i++) // this->alphaChains.at(i) = Shaderchains::Alpha(context.device, context.descPool, // this->downsampleChain.getOutImages().at(i), i == 0); @@ -65,20 +65,20 @@ Generator::Generator(const Context& context) { // ); // } // } - // this->mergeChain = Shaderchains::Merge(context.device, context.descPool, - // this->fullFrame0, - // this->fullFrame1, - // this->zetaChains.at(2).getOutImage(), - // this->epsilonChains.at(2).getOutImage(), - // this->deltaChains.at(2).getOutImage() - // ); + this->mergeChain = Shaderchains::Merge(context.device, context.descPool, + this->inImg_0, + this->inImg_1, + this->zetaChains.at(2).getOutImage(), + this->epsilonChains.at(2).getOutImage(), + this->deltaChains.at(2).getOutImage() + ); } void Generator::present(const Context& context) { Core::CommandBuffer cmdBuffer(context.device, context.cmdPool); cmdBuffer.begin(); - // this->downsampleChain.Dispatch(cmdBuffer, fc); + this->downsampleChain.Dispatch(cmdBuffer, fc); // for (size_t i = 0; i < 7; i++) // this->alphaChains.at(6 - i).Dispatch(cmdBuffer, fc); // this->betaChain.Dispatch(cmdBuffer, fc); @@ -92,7 +92,7 @@ void Generator::present(const Context& context) { // if (i < 2) // this->extractChains.at(i).Dispatch(cmdBuffer); // } - // this->mergeChain.Dispatch(cmdBuffer, fc); + this->mergeChain.Dispatch(cmdBuffer, fc); cmdBuffer.end(); diff --git a/src/shaderchains/downsample.cpp b/src/shaderchains/downsample.cpp index 790e210..c60e8bc 100644 --- a/src/shaderchains/downsample.cpp +++ b/src/shaderchains/downsample.cpp @@ -4,18 +4,20 @@ using namespace LSFG::Shaderchains; Downsample::Downsample(const Device& device, const Core::DescriptorPool& pool, - Core::Image inImg) - : inImg(std::move(inImg)) { + Core::Image inImg_0, Core::Image inImg_1) + : inImg_0(std::move(inImg_0)), + inImg_1(std::move(inImg_1)) { this->shaderModule = Core::ShaderModule(device, "rsc/shaders/downsample.spv", { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, { 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, { 7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE }, { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER } }); this->pipeline = Core::Pipeline(device, this->shaderModule); - this->descriptorSet = Core::DescriptorSet(device, pool, this->shaderModule); + for (size_t i = 0; i < 2; i++) + this->descriptorSets.at(i) = Core::DescriptorSet(device, pool, this->shaderModule); this->buffer = Core::Buffer(device, Globals::fgBuffer, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); - auto extent = this->inImg.getExtent(); + auto extent = this->inImg_0.getExtent(); for (size_t i = 0; i < 7; i++) this->outImgs.at(i) = Core::Image(device, { extent.width >> i, extent.height >> i }, @@ -23,27 +25,31 @@ Downsample::Downsample(const Device& device, const Core::DescriptorPool& pool, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_IMAGE_ASPECT_COLOR_BIT); - this->descriptorSet.update(device) - .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder) - .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg) - .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs) - .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffer) - .build(); + for (size_t fc = 0; fc < 2; fc++) { + auto& inImg = (fc % 2 == 0) ? this->inImg_0 : this->inImg_1; + this->descriptorSets.at(fc).update(device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, inImg) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs) + .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffer) + .build(); + } } -void Downsample::Dispatch(const Core::CommandBuffer& buf) { - auto extent = this->inImg.getExtent(); +void Downsample::Dispatch(const Core::CommandBuffer& buf, uint64_t fc) { + auto extent = this->inImg_0.getExtent(); // first pass const uint32_t threadsX = (extent.width + 63) >> 6; const uint32_t threadsY = (extent.height + 63) >> 6; + auto& inImg = (fc % 2 == 0) ? this->inImg_0 : this->inImg_1; Utils::BarrierBuilder(buf) - .addW2R(this->inImg) + .addW2R(inImg) .addR2W(this->outImgs) .build(); this->pipeline.bind(buf); - this->descriptorSet.bind(buf, this->pipeline); + this->descriptorSets.at(fc % 2).bind(buf, this->pipeline); buf.dispatch(threadsX, threadsY, 1); } diff --git a/src/shaderchains/merge.cpp b/src/shaderchains/merge.cpp index 63f74b7..82f09a7 100644 --- a/src/shaderchains/merge.cpp +++ b/src/shaderchains/merge.cpp @@ -20,7 +20,8 @@ Merge::Merge(const Device& device, const Core::DescriptorPool& pool, { 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE }, { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER } }); this->pipeline = Core::Pipeline(device, this->shaderModule); - this->descriptorSet = Core::DescriptorSet(device, pool, this->shaderModule); + for (size_t i = 0; i < 2; i++) + this->descriptorSets.at(i) = Core::DescriptorSet(device, pool, this->shaderModule); this->buffer = Core::Buffer(device, Globals::fgBuffer, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); auto extent = this->inImg1.getExtent(); @@ -31,20 +32,22 @@ Merge::Merge(const Device& device, const Core::DescriptorPool& pool, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_IMAGE_ASPECT_COLOR_BIT); - this->descriptorSet.update(device) - .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder) - .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampEdge) - .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg1) - .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg2) - .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg3) - .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg4) - .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg5) - .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg) - .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffer) - .build(); + for (size_t fc = 0; fc < 2; fc++) { + this->descriptorSets.at(fc).update(device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampEdge) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, (fc % 2 == 0) ? this->inImg1 : this->inImg2) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, (fc % 2 == 0) ? this->inImg2 : this->inImg1) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg3) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg4) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg5) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg) + .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffer) + .build(); + } } -void Merge::Dispatch(const Core::CommandBuffer& buf) { +void Merge::Dispatch(const Core::CommandBuffer& buf, uint64_t fc) { auto extent = this->inImg1.getExtent(); // first pass @@ -61,6 +64,6 @@ void Merge::Dispatch(const Core::CommandBuffer& buf) { .build(); this->pipeline.bind(buf); - this->descriptorSet.bind(buf, this->pipeline); + this->descriptorSets.at(fc).bind(buf, this->pipeline); buf.dispatch(threadsX, threadsY, 1); }