temporal logic for alpha & beta

This commit is contained in:
PancakeTAS 2025-07-01 03:58:24 +02:00
parent 1a9925129b
commit bf3c30575e
No known key found for this signature in database
5 changed files with 115 additions and 58 deletions

View file

@ -38,13 +38,18 @@ namespace LSFG::Shaderchains {
/// Dispatch the shaderchain.
///
/// @param buf The command buffer to use for dispatching.
/// @param fc The frame count, used to determine which output images to write to.
///
/// @throws std::logic_error if the command buffer is not recording.
///
void Dispatch(const Core::CommandBuffer& buf);
void Dispatch(const Core::CommandBuffer& buf, uint64_t fc);
/// Get the output images.
[[nodiscard]] const auto& getOutImages() const { return this->outImgs; }
/// Get the output images written to when fc % 2 == 0
[[nodiscard]] const auto& getOutImages0() const { return this->outImgs_0; }
/// Get the output images written to when fc % 2 == 1
[[nodiscard]] const auto& getOutImages1() const { return this->outImgs_1; }
/// Get the output images written to when fc % 2 == 2
[[nodiscard]] const auto& getOutImages2() const { return this->outImgs_2; }
/// Trivially copyable, moveable and destructible
Alpha(const Alpha&) noexcept = default;
@ -55,15 +60,18 @@ namespace LSFG::Shaderchains {
private:
std::array<Core::ShaderModule, 4> shaderModules;
std::array<Core::Pipeline, 4> pipelines;
std::array<Core::DescriptorSet, 4> descriptorSets;
std::array<Core::DescriptorSet, 3> descriptorSets; // last shader is special
std::array<Core::DescriptorSet, 3> specialDescriptorSets;
Core::Image inImg;
std::array<Core::Image, 2> tempImgs1; // half-size
std::array<Core::Image, 2> tempImgs2; // half-size
std::array<Core::Image, 4> tempImgs3; // quarter-size
std::array<Core::Image, 2> tempImgs1;
std::array<Core::Image, 2> tempImgs2;
std::array<Core::Image, 4> tempImgs3;
std::array<Core::Image, 4> outImgs; // quarter-size
std::array<Core::Image, 4> outImgs_0;
std::array<Core::Image, 4> outImgs_1;
std::array<Core::Image, 4> outImgs_2;
};
}

View file

@ -29,23 +29,26 @@ namespace LSFG::Shaderchains {
///
/// @param device The Vulkan device to create the resources on.
/// @param pool The descriptor pool to allocate in.
/// @param temporalImgs The temporal images to use for processing.
/// @param inImgs The input images to process
/// @param inImgs_0 The next input images to process (when fc % 2 == 0)
/// @param inImgs_1 The prev input images to process (when fc % 2 == 0)
/// @param inImgs_2 The prev prev input images to process (when fc % 2 == 0)
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Beta(const Device& device, const Core::DescriptorPool& pool,
std::array<Core::Image, 8> temporalImgs,
std::array<Core::Image, 4> inImgs);
std::array<Core::Image, 4> inImgs_0,
std::array<Core::Image, 4> inImgs_1,
std::array<Core::Image, 4> inImgs_2);
///
/// Dispatch the shaderchain.
///
/// @param buf The command buffer to use for dispatching.
/// @param fc The frame count, used to select the input images.
///
/// @throws std::logic_error if the command buffer is not recording.
///
void Dispatch(const Core::CommandBuffer& buf);
void Dispatch(const Core::CommandBuffer& buf, uint64_t fc);
/// Get the output images.
[[nodiscard]] const auto& getOutImages() const { return this->outImgs; }
@ -59,11 +62,13 @@ namespace LSFG::Shaderchains {
private:
std::array<Core::ShaderModule, 5> shaderModules;
std::array<Core::Pipeline, 5> pipelines;
std::array<Core::DescriptorSet, 5> descriptorSets;
std::array<Core::DescriptorSet, 4> descriptorSets; // first shader has special logic
std::array<Core::DescriptorSet, 3> specialDescriptorSets;
Core::Buffer buffer;
std::array<Core::Image, 8> temporalImgs;
std::array<Core::Image, 4> inImgs;
std::array<Core::Image, 4> inImgs_0;
std::array<Core::Image, 4> inImgs_1;
std::array<Core::Image, 4> inImgs_2;
std::array<Core::Image, 2> tempImgs1;
std::array<Core::Image, 2> tempImgs2;

View file

@ -11,13 +11,13 @@ Generator::Generator(const Context& context) {
// create shader chains
this->downsampleChain = Shaderchains::Downsample(context.device, context.descPool,
this->inImg_0, this->inImg_1);
// for (size_t i = 0; i < 7; i++)
// this->alphaChains.at(i) = Shaderchains::Alpha(context.device, context.descPool,
// this->downsampleChain.getOutImages().at(i), i == 0);
// this->betaChain = Shaderchains::Beta(context.device, context.descPool,
// this->alphaChains.at(0).getOutImages0(),
// this->alphaChains.at(0).getOutImages1(),
// this->alphaChains.at(0).getOutImages2());
for (size_t i = 0; i < 7; i++)
this->alphaChains.at(i) = Shaderchains::Alpha(context.device, context.descPool,
this->downsampleChain.getOutImages().at(i));
this->betaChain = Shaderchains::Beta(context.device, context.descPool,
this->alphaChains.at(0).getOutImages0(),
this->alphaChains.at(0).getOutImages2(),
this->alphaChains.at(0).getOutImages1());
// for (size_t i = 0; i < 7; i++) {
// if (i < 4) {
// this->gammaChains.at(i) = Shaderchains::Gamma(context.device, context.descPool,
@ -79,9 +79,9 @@ void Generator::present(const Context& context) {
cmdBuffer.begin();
this->downsampleChain.Dispatch(cmdBuffer, fc);
// for (size_t i = 0; i < 7; i++)
// this->alphaChains.at(6 - i).Dispatch(cmdBuffer, fc);
// this->betaChain.Dispatch(cmdBuffer, fc);
for (size_t i = 0; i < 7; i++)
this->alphaChains.at(6 - i).Dispatch(cmdBuffer, fc);
this->betaChain.Dispatch(cmdBuffer, fc);
// for (size_t i = 0; i < 4; i++)
// this->gammaChains.at(i).Dispatch(cmdBuffer);
// for (size_t i = 0; i < 3; i++) {

View file

@ -27,9 +27,13 @@ Alpha::Alpha(const Device& device, const Core::DescriptorPool& pool,
for (size_t i = 0; i < 4; i++) {
this->pipelines.at(i) = Core::Pipeline(device,
this->shaderModules.at(i));
if (i == 3) continue; // last shader is special
this->descriptorSets.at(i) = Core::DescriptorSet(device, pool,
this->shaderModules.at(i));
}
for (size_t i = 0; i < 3; i++)
this->specialDescriptorSets.at(i) = Core::DescriptorSet(device, pool,
this->shaderModules.at(3));
const auto extent = this->inImg.getExtent();
@ -60,7 +64,17 @@ Alpha::Alpha(const Device& device, const Core::DescriptorPool& pool,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->outImgs.at(i) = Core::Image(device,
this->outImgs_0.at(i) = Core::Image(device,
quarterExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->outImgs_1.at(i) = Core::Image(device,
quarterExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->outImgs_2.at(i) = Core::Image(device,
quarterExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
@ -82,14 +96,19 @@ Alpha::Alpha(const Device& device, const Core::DescriptorPool& pool,
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs3)
.build();
this->descriptorSets.at(3).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs3)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs)
.build();
for (size_t fc = 0; fc < 3; fc++) {
auto& outImgs = this->outImgs_0;
if (fc == 1) outImgs = this->outImgs_1;
else if (fc == 2) outImgs = this->outImgs_2;
this->specialDescriptorSets.at(fc).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs3)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, outImgs)
.build();
}
}
void Alpha::Dispatch(const Core::CommandBuffer& buf) {
void Alpha::Dispatch(const Core::CommandBuffer& buf, uint64_t fc) {
const auto halfExtent = this->tempImgs1.at(0).getExtent();
const auto quarterExtent = this->tempImgs3.at(0).getExtent();
@ -130,12 +149,15 @@ void Alpha::Dispatch(const Core::CommandBuffer& buf) {
buf.dispatch(threadsX, threadsY, 1);
// fourth pass
auto& outImgs = this->outImgs_0;
if ((fc % 3) == 1) outImgs = this->outImgs_1;
else if ((fc % 3) == 2) outImgs = this->outImgs_2;
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs3)
.addR2W(this->outImgs)
.addR2W(outImgs)
.build();
this->pipelines.at(3).bind(buf);
this->descriptorSets.at(3).bind(buf, this->pipelines.at(3));
this->specialDescriptorSets.at(fc % 3).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -4,10 +4,12 @@
using namespace LSFG::Shaderchains;
Beta::Beta(const Device& device, const Core::DescriptorPool& pool,
std::array<Core::Image, 8> temporalImgs,
std::array<Core::Image, 4> inImgs)
: temporalImgs(std::move(temporalImgs)),
inImgs(std::move(inImgs)) {
std::array<Core::Image, 4> inImgs_0,
std::array<Core::Image, 4> inImgs_1,
std::array<Core::Image, 4> inImgs_2)
: inImgs_0(std::move(inImgs_0)),
inImgs_1(std::move(inImgs_1)),
inImgs_2(std::move(inImgs_2)) {
this->shaderModules = {{
Core::ShaderModule(device, "rsc/shaders/beta/0.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
@ -34,12 +36,16 @@ Beta::Beta(const Device& device, const Core::DescriptorPool& pool,
for (size_t i = 0; i < 5; i++) {
this->pipelines.at(i) = Core::Pipeline(device,
this->shaderModules.at(i));
this->descriptorSets.at(i) = Core::DescriptorSet(device, pool,
if (i == 0) continue; // first shader has special logic
this->descriptorSets.at(i+1) = Core::DescriptorSet(device, pool,
this->shaderModules.at(i));
}
for (size_t i = 0; i < 3; i++)
this->specialDescriptorSets.at(i) = Core::DescriptorSet(device, pool,
this->shaderModules.at(4));
this->buffer = Core::Buffer(device, Globals::fgBuffer, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
const auto extent = this->temporalImgs.at(0).getExtent();
const auto extent = this->inImgs_0.at(0).getExtent();
for (size_t i = 0; i < 2; i++) {
this->tempImgs1.at(i) = Core::Image(device,
@ -62,28 +68,43 @@ Beta::Beta(const Device& device, const Core::DescriptorPool& pool,
VK_IMAGE_ASPECT_COLOR_BIT);
}
for (size_t fc = 0; fc < 3; fc++) {
auto& nextImgs = this->inImgs_0;
auto& prevImgs = this->inImgs_2;
auto& pprevImgs = this->inImgs_1;
if (fc == 1) {
nextImgs = this->inImgs_1;
prevImgs = this->inImgs_0;
pprevImgs = this->inImgs_2;
} else if (fc == 2) {
nextImgs = this->inImgs_2;
prevImgs = this->inImgs_1;
pprevImgs = this->inImgs_0;
}
this->specialDescriptorSets.at(fc).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, pprevImgs)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, prevImgs)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, nextImgs)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
}
this->descriptorSets.at(0).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->temporalImgs)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
this->descriptorSets.at(1).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(2).update(device)
this->descriptorSets.at(1).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
this->descriptorSets.at(3).update(device)
this->descriptorSets.at(2).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(4).update(device)
this->descriptorSets.at(3).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs)
@ -91,7 +112,7 @@ Beta::Beta(const Device& device, const Core::DescriptorPool& pool,
.build();
}
void Beta::Dispatch(const Core::CommandBuffer& buf) {
void Beta::Dispatch(const Core::CommandBuffer& buf, uint64_t fc) {
const auto extent = this->tempImgs1.at(0).getExtent();
// first pass
@ -99,13 +120,14 @@ void Beta::Dispatch(const Core::CommandBuffer& buf) {
uint32_t threadsY = (extent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->temporalImgs)
.addW2R(this->inImgs)
.addW2R(this->inImgs_0)
.addW2R(this->inImgs_1)
.addW2R(this->inImgs_2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(0).bind(buf);
this->descriptorSets.at(0).bind(buf, this->pipelines.at(0));
this->specialDescriptorSets.at(fc % 3).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second pass
@ -115,7 +137,7 @@ void Beta::Dispatch(const Core::CommandBuffer& buf) {
.build();
this->pipelines.at(1).bind(buf);
this->descriptorSets.at(1).bind(buf, this->pipelines.at(1));
this->descriptorSets.at(0).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third pass
@ -125,7 +147,7 @@ void Beta::Dispatch(const Core::CommandBuffer& buf) {
.build();
this->pipelines.at(2).bind(buf);
this->descriptorSets.at(2).bind(buf, this->pipelines.at(2));
this->descriptorSets.at(1).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth pass
@ -135,7 +157,7 @@ void Beta::Dispatch(const Core::CommandBuffer& buf) {
.build();
this->pipelines.at(3).bind(buf);
this->descriptorSets.at(3).bind(buf, this->pipelines.at(3));
this->descriptorSets.at(2).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
// fifth pass
@ -148,6 +170,6 @@ void Beta::Dispatch(const Core::CommandBuffer& buf) {
.build();
this->pipelines.at(4).bind(buf);
this->descriptorSets.at(4).bind(buf, this->pipelines.at(4));
this->descriptorSets.at(3).bind(buf, this->pipelines.at(4));
buf.dispatch(threadsX, threadsY, 1);
}