diff --git a/include/shaderchains/alpha.hpp b/include/shaderchains/alpha.hpp index 5f8dec0..2f18daf 100644 --- a/include/shaderchains/alpha.hpp +++ b/include/shaderchains/alpha.hpp @@ -38,13 +38,18 @@ namespace LSFG::Shaderchains { /// Dispatch the shaderchain. /// /// @param buf The command buffer to use for dispatching. + /// @param fc The frame count, used to determine which output images to write to. /// /// @throws std::logic_error if the command buffer is not recording. /// - void Dispatch(const Core::CommandBuffer& buf); + void Dispatch(const Core::CommandBuffer& buf, uint64_t fc); - /// Get the output images. - [[nodiscard]] const auto& getOutImages() const { return this->outImgs; } + /// Get the output images written to when fc % 2 == 0 + [[nodiscard]] const auto& getOutImages0() const { return this->outImgs_0; } + /// Get the output images written to when fc % 2 == 1 + [[nodiscard]] const auto& getOutImages1() const { return this->outImgs_1; } + /// Get the output images written to when fc % 2 == 2 + [[nodiscard]] const auto& getOutImages2() const { return this->outImgs_2; } /// Trivially copyable, moveable and destructible Alpha(const Alpha&) noexcept = default; @@ -55,15 +60,18 @@ namespace LSFG::Shaderchains { private: std::array shaderModules; std::array pipelines; - std::array descriptorSets; + std::array descriptorSets; // last shader is special + std::array specialDescriptorSets; Core::Image inImg; - std::array tempImgs1; // half-size - std::array tempImgs2; // half-size - std::array tempImgs3; // quarter-size + std::array tempImgs1; + std::array tempImgs2; + std::array tempImgs3; - std::array outImgs; // quarter-size + std::array outImgs_0; + std::array outImgs_1; + std::array outImgs_2; }; } diff --git a/include/shaderchains/beta.hpp b/include/shaderchains/beta.hpp index 3aa37e8..f0de6d0 100644 --- a/include/shaderchains/beta.hpp +++ b/include/shaderchains/beta.hpp @@ -29,23 +29,26 @@ namespace LSFG::Shaderchains { /// /// @param device The Vulkan device to create the resources on. /// @param pool The descriptor pool to allocate in. - /// @param temporalImgs The temporal images to use for processing. - /// @param inImgs The input images to process + /// @param inImgs_0 The next input images to process (when fc % 2 == 0) + /// @param inImgs_1 The prev input images to process (when fc % 2 == 0) + /// @param inImgs_2 The prev prev input images to process (when fc % 2 == 0) /// /// @throws LSFG::vulkan_error if resource creation fails. /// Beta(const Device& device, const Core::DescriptorPool& pool, - std::array temporalImgs, - std::array inImgs); + std::array inImgs_0, + std::array inImgs_1, + std::array inImgs_2); /// /// Dispatch the shaderchain. /// /// @param buf The command buffer to use for dispatching. + /// @param fc The frame count, used to select the input images. /// /// @throws std::logic_error if the command buffer is not recording. /// - void Dispatch(const Core::CommandBuffer& buf); + void Dispatch(const Core::CommandBuffer& buf, uint64_t fc); /// Get the output images. [[nodiscard]] const auto& getOutImages() const { return this->outImgs; } @@ -59,11 +62,13 @@ namespace LSFG::Shaderchains { private: std::array shaderModules; std::array pipelines; - std::array descriptorSets; + std::array descriptorSets; // first shader has special logic + std::array specialDescriptorSets; Core::Buffer buffer; - std::array temporalImgs; - std::array inImgs; + std::array inImgs_0; + std::array inImgs_1; + std::array inImgs_2; std::array tempImgs1; std::array tempImgs2; diff --git a/src/lsfg.cpp b/src/lsfg.cpp index 130d95c..5b5ec1b 100644 --- a/src/lsfg.cpp +++ b/src/lsfg.cpp @@ -11,13 +11,13 @@ Generator::Generator(const Context& context) { // create shader chains this->downsampleChain = Shaderchains::Downsample(context.device, context.descPool, this->inImg_0, this->inImg_1); - // for (size_t i = 0; i < 7; i++) - // this->alphaChains.at(i) = Shaderchains::Alpha(context.device, context.descPool, - // this->downsampleChain.getOutImages().at(i), i == 0); - // this->betaChain = Shaderchains::Beta(context.device, context.descPool, - // this->alphaChains.at(0).getOutImages0(), - // this->alphaChains.at(0).getOutImages1(), - // this->alphaChains.at(0).getOutImages2()); + for (size_t i = 0; i < 7; i++) + this->alphaChains.at(i) = Shaderchains::Alpha(context.device, context.descPool, + this->downsampleChain.getOutImages().at(i)); + this->betaChain = Shaderchains::Beta(context.device, context.descPool, + this->alphaChains.at(0).getOutImages0(), + this->alphaChains.at(0).getOutImages2(), + this->alphaChains.at(0).getOutImages1()); // for (size_t i = 0; i < 7; i++) { // if (i < 4) { // this->gammaChains.at(i) = Shaderchains::Gamma(context.device, context.descPool, @@ -79,9 +79,9 @@ void Generator::present(const Context& context) { cmdBuffer.begin(); this->downsampleChain.Dispatch(cmdBuffer, fc); - // for (size_t i = 0; i < 7; i++) - // this->alphaChains.at(6 - i).Dispatch(cmdBuffer, fc); - // this->betaChain.Dispatch(cmdBuffer, fc); + for (size_t i = 0; i < 7; i++) + this->alphaChains.at(6 - i).Dispatch(cmdBuffer, fc); + this->betaChain.Dispatch(cmdBuffer, fc); // for (size_t i = 0; i < 4; i++) // this->gammaChains.at(i).Dispatch(cmdBuffer); // for (size_t i = 0; i < 3; i++) { diff --git a/src/shaderchains/alpha.cpp b/src/shaderchains/alpha.cpp index 41d9f9d..8f18f90 100644 --- a/src/shaderchains/alpha.cpp +++ b/src/shaderchains/alpha.cpp @@ -27,9 +27,13 @@ Alpha::Alpha(const Device& device, const Core::DescriptorPool& pool, for (size_t i = 0; i < 4; i++) { this->pipelines.at(i) = Core::Pipeline(device, this->shaderModules.at(i)); + if (i == 3) continue; // last shader is special this->descriptorSets.at(i) = Core::DescriptorSet(device, pool, this->shaderModules.at(i)); } + for (size_t i = 0; i < 3; i++) + this->specialDescriptorSets.at(i) = Core::DescriptorSet(device, pool, + this->shaderModules.at(3)); const auto extent = this->inImg.getExtent(); @@ -60,7 +64,17 @@ Alpha::Alpha(const Device& device, const Core::DescriptorPool& pool, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, VK_IMAGE_ASPECT_COLOR_BIT); - this->outImgs.at(i) = Core::Image(device, + this->outImgs_0.at(i) = Core::Image(device, + quarterExtent, + VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + VK_IMAGE_ASPECT_COLOR_BIT); + this->outImgs_1.at(i) = Core::Image(device, + quarterExtent, + VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + VK_IMAGE_ASPECT_COLOR_BIT); + this->outImgs_2.at(i) = Core::Image(device, quarterExtent, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, @@ -82,14 +96,19 @@ Alpha::Alpha(const Device& device, const Core::DescriptorPool& pool, .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2) .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs3) .build(); - this->descriptorSets.at(3).update(device) - .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder) - .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs3) - .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs) - .build(); + for (size_t fc = 0; fc < 3; fc++) { + auto& outImgs = this->outImgs_0; + if (fc == 1) outImgs = this->outImgs_1; + else if (fc == 2) outImgs = this->outImgs_2; + this->specialDescriptorSets.at(fc).update(device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs3) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, outImgs) + .build(); + } } -void Alpha::Dispatch(const Core::CommandBuffer& buf) { +void Alpha::Dispatch(const Core::CommandBuffer& buf, uint64_t fc) { const auto halfExtent = this->tempImgs1.at(0).getExtent(); const auto quarterExtent = this->tempImgs3.at(0).getExtent(); @@ -130,12 +149,15 @@ void Alpha::Dispatch(const Core::CommandBuffer& buf) { buf.dispatch(threadsX, threadsY, 1); // fourth pass + auto& outImgs = this->outImgs_0; + if ((fc % 3) == 1) outImgs = this->outImgs_1; + else if ((fc % 3) == 2) outImgs = this->outImgs_2; Utils::BarrierBuilder(buf) .addW2R(this->tempImgs3) - .addR2W(this->outImgs) + .addR2W(outImgs) .build(); this->pipelines.at(3).bind(buf); - this->descriptorSets.at(3).bind(buf, this->pipelines.at(3)); + this->specialDescriptorSets.at(fc % 3).bind(buf, this->pipelines.at(3)); buf.dispatch(threadsX, threadsY, 1); } diff --git a/src/shaderchains/beta.cpp b/src/shaderchains/beta.cpp index bf96ae8..dbbfab6 100644 --- a/src/shaderchains/beta.cpp +++ b/src/shaderchains/beta.cpp @@ -4,10 +4,12 @@ using namespace LSFG::Shaderchains; Beta::Beta(const Device& device, const Core::DescriptorPool& pool, - std::array temporalImgs, - std::array inImgs) - : temporalImgs(std::move(temporalImgs)), - inImgs(std::move(inImgs)) { + std::array inImgs_0, + std::array inImgs_1, + std::array inImgs_2) + : inImgs_0(std::move(inImgs_0)), + inImgs_1(std::move(inImgs_1)), + inImgs_2(std::move(inImgs_2)) { this->shaderModules = {{ Core::ShaderModule(device, "rsc/shaders/beta/0.spv", { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, @@ -34,12 +36,16 @@ Beta::Beta(const Device& device, const Core::DescriptorPool& pool, for (size_t i = 0; i < 5; i++) { this->pipelines.at(i) = Core::Pipeline(device, this->shaderModules.at(i)); - this->descriptorSets.at(i) = Core::DescriptorSet(device, pool, + if (i == 0) continue; // first shader has special logic + this->descriptorSets.at(i+1) = Core::DescriptorSet(device, pool, this->shaderModules.at(i)); } + for (size_t i = 0; i < 3; i++) + this->specialDescriptorSets.at(i) = Core::DescriptorSet(device, pool, + this->shaderModules.at(4)); this->buffer = Core::Buffer(device, Globals::fgBuffer, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT); - const auto extent = this->temporalImgs.at(0).getExtent(); + const auto extent = this->inImgs_0.at(0).getExtent(); for (size_t i = 0; i < 2; i++) { this->tempImgs1.at(i) = Core::Image(device, @@ -62,28 +68,43 @@ Beta::Beta(const Device& device, const Core::DescriptorPool& pool, VK_IMAGE_ASPECT_COLOR_BIT); } + for (size_t fc = 0; fc < 3; fc++) { + auto& nextImgs = this->inImgs_0; + auto& prevImgs = this->inImgs_2; + auto& pprevImgs = this->inImgs_1; + if (fc == 1) { + nextImgs = this->inImgs_1; + prevImgs = this->inImgs_0; + pprevImgs = this->inImgs_2; + } else if (fc == 2) { + nextImgs = this->inImgs_2; + prevImgs = this->inImgs_1; + pprevImgs = this->inImgs_0; + } + this->specialDescriptorSets.at(fc).update(device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, pprevImgs) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, prevImgs) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, nextImgs) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1) + .build(); + } this->descriptorSets.at(0).update(device) - .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder) - .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->temporalImgs) - .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs) - .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1) - .build(); - this->descriptorSets.at(1).update(device) .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder) .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1) .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2) .build(); - this->descriptorSets.at(2).update(device) + this->descriptorSets.at(1).update(device) .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder) .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2) .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1) .build(); - this->descriptorSets.at(3).update(device) + this->descriptorSets.at(2).update(device) .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder) .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1) .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2) .build(); - this->descriptorSets.at(4).update(device) + this->descriptorSets.at(3).update(device) .add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder) .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2) .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs) @@ -91,7 +112,7 @@ Beta::Beta(const Device& device, const Core::DescriptorPool& pool, .build(); } -void Beta::Dispatch(const Core::CommandBuffer& buf) { +void Beta::Dispatch(const Core::CommandBuffer& buf, uint64_t fc) { const auto extent = this->tempImgs1.at(0).getExtent(); // first pass @@ -99,13 +120,14 @@ void Beta::Dispatch(const Core::CommandBuffer& buf) { uint32_t threadsY = (extent.height + 7) >> 3; Utils::BarrierBuilder(buf) - .addW2R(this->temporalImgs) - .addW2R(this->inImgs) + .addW2R(this->inImgs_0) + .addW2R(this->inImgs_1) + .addW2R(this->inImgs_2) .addR2W(this->tempImgs1) .build(); this->pipelines.at(0).bind(buf); - this->descriptorSets.at(0).bind(buf, this->pipelines.at(0)); + this->specialDescriptorSets.at(fc % 3).bind(buf, this->pipelines.at(0)); buf.dispatch(threadsX, threadsY, 1); // second pass @@ -115,7 +137,7 @@ void Beta::Dispatch(const Core::CommandBuffer& buf) { .build(); this->pipelines.at(1).bind(buf); - this->descriptorSets.at(1).bind(buf, this->pipelines.at(1)); + this->descriptorSets.at(0).bind(buf, this->pipelines.at(1)); buf.dispatch(threadsX, threadsY, 1); // third pass @@ -125,7 +147,7 @@ void Beta::Dispatch(const Core::CommandBuffer& buf) { .build(); this->pipelines.at(2).bind(buf); - this->descriptorSets.at(2).bind(buf, this->pipelines.at(2)); + this->descriptorSets.at(1).bind(buf, this->pipelines.at(2)); buf.dispatch(threadsX, threadsY, 1); // fourth pass @@ -135,7 +157,7 @@ void Beta::Dispatch(const Core::CommandBuffer& buf) { .build(); this->pipelines.at(3).bind(buf); - this->descriptorSets.at(3).bind(buf, this->pipelines.at(3)); + this->descriptorSets.at(2).bind(buf, this->pipelines.at(3)); buf.dispatch(threadsX, threadsY, 1); // fifth pass @@ -148,6 +170,6 @@ void Beta::Dispatch(const Core::CommandBuffer& buf) { .build(); this->pipelines.at(4).bind(buf); - this->descriptorSets.at(4).bind(buf, this->pipelines.at(4)); + this->descriptorSets.at(3).bind(buf, this->pipelines.at(4)); buf.dispatch(threadsX, threadsY, 1); }