implement lsfg 3.0

This commit is contained in:
PancakeTAS 2025-07-10 01:50:36 +02:00
parent 45f4296319
commit 778a87dd24
No known key found for this signature in database
83 changed files with 2039 additions and 3032 deletions

View file

@ -1,37 +0,0 @@
project(lsfg-vk-gen LANGUAGES CXX)
file(GLOB BACKEND_SOURCES
"src/core/*.cpp"
"src/pool/*.cpp"
"src/shaderchains/*.cpp"
"src/utils/*.cpp"
"src/*.cpp"
)
add_library(lsfg-vk-gen STATIC ${BACKEND_SOURCES})
target_include_directories(lsfg-vk-gen
PRIVATE include
PUBLIC public)
target_link_libraries(lsfg-vk-gen
PUBLIC vulkan peparse dxvk)
target_compile_options(lsfg-vk-gen PRIVATE
-Weverything -fPIC
# disable compat c++ flags
-Wno-pre-c++20-compat-pedantic
-Wno-pre-c++17-compat
-Wno-c++98-compat-pedantic
-Wno-c++98-compat
# disable other flags
-Wno-missing-designated-field-initializers
-Wno-shadow # allow shadowing
-Wno-switch-enum # ignore missing cases
-Wno-switch-default # ignore missing default
-Wno-padded # ignore automatic padding
-Wno-exit-time-destructors # allow globals
-Wno-global-constructors
# required for vulkan
-Wno-cast-function-type-strict
# required for peparse
-Wno-unused-template
)

View file

@ -1,95 +0,0 @@
#ifndef CONTEXT_HPP
#define CONTEXT_HPP
#include "pool/shaderpool.hpp"
#include "core/commandbuffer.hpp"
#include "core/commandpool.hpp"
#include "core/descriptorpool.hpp"
#include "core/fence.hpp"
#include "core/image.hpp"
#include "core/semaphore.hpp"
#include "shaderchains/alpha.hpp"
#include "shaderchains/beta.hpp"
#include "shaderchains/delta.hpp"
#include "shaderchains/downsample.hpp"
#include "shaderchains/epsilon.hpp"
#include "shaderchains/extract.hpp"
#include "shaderchains/gamma.hpp"
#include "shaderchains/magic.hpp"
#include "shaderchains/merge.hpp"
#include "shaderchains/zeta.hpp"
namespace LSFG {
///
/// LSFG context.
///
class Context {
public:
///
/// Create a generator instance.
///
/// @param device The Vulkan device to use.
/// @param shaderpool The shader pool to use.
/// @param width Width of the input images.
/// @param height Height of the input images.
/// @param in0 File descriptor for the first input image.
/// @param in1 File descriptor for the second input image.
/// @param outN File descriptor for the output image.
///
/// @throws LSFG::vulkan_error if the generator fails to initialize.
///
Context(const Core::Device& device, Pool::ShaderPool& shaderpool,
uint32_t width, uint32_t height, int in0, int in1,
const std::vector<int>& outN);
///
/// Schedule the next generation.
///
/// @param device The Vulkan device to use.
/// @param inSem Semaphore to wait on before starting the generation.
/// @param outSem Semaphores to signal after each generation is done.
///
/// @throws LSFG::vulkan_error if the generator fails to present.
///
void present(const Core::Device& device, int inSem,
const std::vector<int>& outSem);
// Trivially copyable, moveable and destructible
Context(const Context&) = default;
Context(Context&&) = default;
Context& operator=(const Context&) = default;
Context& operator=(Context&&) = default;
~Context() = default;
private:
Core::DescriptorPool descPool;
Core::CommandPool cmdPool;
Core::Image inImg_0, inImg_1; // inImg_0 is next (inImg_1 prev) when fc % 2 == 0
uint64_t frameIdx{0};
struct RenderInfo {
Core::Semaphore inSemaphore; // wait for copy
Core::CommandBuffer cmdBuffer1;
std::vector<Core::Semaphore> internalSemaphores; // first step output
std::vector<Core::CommandBuffer> cmdBuffers2; // second step output
std::vector<Core::Semaphore> outSemaphores; // signal when done with each pass
std::optional<std::vector<Core::Fence>> completionFences;
}; // data for a single render
std::array<RenderInfo, 8> renderInfos; // 8 passes, why not
Shaderchains::Downsample downsampleChain;
std::array<Shaderchains::Alpha, 7> alphaChains;
Shaderchains::Beta betaChain;
std::array<Shaderchains::Gamma, 4> gammaChains;
std::array<Shaderchains::Magic, 3> magicChains;
std::array<Shaderchains::Delta, 3> deltaChains;
std::array<Shaderchains::Epsilon, 3> epsilonChains;
std::array<Shaderchains::Zeta, 3> zetaChains;
std::array<Shaderchains::Extract, 2> extractChains;
Shaderchains::Merge mergeChain;
};
}
#endif // CONTEXT_HPP

View file

@ -1,63 +0,0 @@
#ifndef RESOURCES_HPP
#define RESOURCES_HPP
#include <dxbc/dxbc_options.h>
#include <d3d11.h>
#include <cstdint>
#include <string>
#include <unordered_map>
#include <vector>
namespace LSFG::Pool {
///
/// DLL resource extractor class.
///
class Extractor {
public:
Extractor() noexcept = default;
///
/// Create a new extractor.
///
/// @param path Path to the DLL file.
///
/// @throws std::runtime_error if the file cannot be parsed.
///
Extractor(const std::string& path);
///
/// Get a resource by its hash.
///
/// @param hash Hash of the resource.
/// @return Resource data
///
/// @throws std::runtime_error if the resource is not found.
///
[[nodiscard]] std::vector<uint8_t> getResource(uint32_t hash) const;
// Trivially copyable, moveable and destructible
Extractor(const Extractor&) = delete;
Extractor& operator=(const Extractor&) = delete;
Extractor(Extractor&&) = default;
Extractor& operator=(Extractor&&) = default;
~Extractor() = default;
private:
std::unordered_map<uint32_t, std::vector<uint8_t>> resources;
};
///
/// Translate DXBC into SPIR-V.
///
/// @param dxbc Bytecode to translate.
/// @return Translated SPIR-V bytecode.
///
/// @throws std::runtime_error if the translation fails.
///
[[nodiscard]] std::vector<uint8_t> dxbcToSpirv(const std::vector<uint8_t>& dxbc);
}
#endif // RESOURCES_HPP

View file

@ -1,82 +0,0 @@
#ifndef ALPHA_HPP
#define ALPHA_HPP
#include "pool/shaderpool.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorpool.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "core/device.hpp"
#include <array>
namespace LSFG::Shaderchains {
///
/// Shader chain alpha.
///
/// Takes an 8-bit R image creates four quarter-sized 8-bit RGBA images.
///
class Alpha {
public:
Alpha() = default;
///
/// Initialize the shaderchain.
///
/// @param device The Vulkan device to create the resources on.
/// @param shaderpool The shader pool to use for shader modules.
/// @param pool The descriptor pool to allocate in.
/// @param inImg The input image to process
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Alpha(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
Core::Image inImg);
///
/// Dispatch the shaderchain.
///
/// @param buf The command buffer to use for dispatching.
/// @param fc The frame count, used to determine which output images to write to.
///
/// @throws std::logic_error if the command buffer is not recording.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t fc);
/// Get the output images written to when fc % 3 == 0
[[nodiscard]] const auto& getOutImages0() const { return this->outImgs_0; }
/// Get the output images written to when fc % 3 == 1
[[nodiscard]] const auto& getOutImages1() const { return this->outImgs_1; }
/// Get the output images written to when fc % 3 == 2
[[nodiscard]] const auto& getOutImages2() const { return this->outImgs_2; }
/// Trivially copyable, moveable and destructible
Alpha(const Alpha&) noexcept = default;
Alpha& operator=(const Alpha&) noexcept = default;
Alpha(Alpha&&) noexcept = default;
Alpha& operator=(Alpha&&) noexcept = default;
~Alpha() = default;
private:
std::array<Core::ShaderModule, 4> shaderModules;
std::array<Core::Pipeline, 4> pipelines;
std::array<Core::DescriptorSet, 3> descriptorSets; // last shader is special
std::array<Core::DescriptorSet, 3> specialDescriptorSets;
Core::Image inImg;
std::array<Core::Image, 2> tempImgs1;
std::array<Core::Image, 2> tempImgs2;
std::array<Core::Image, 4> tempImgs3;
std::array<Core::Image, 4> outImgs_0;
std::array<Core::Image, 4> outImgs_1;
std::array<Core::Image, 4> outImgs_2;
};
}
#endif // ALPHA_HPP

View file

@ -1,88 +0,0 @@
#ifndef BETA_HPP
#define BETA_HPP
#include "pool/shaderpool.hpp"
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorpool.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "core/device.hpp"
#include <array>
namespace LSFG::Shaderchains {
///
/// Shader chain beta.
///
/// Takes eight temporal 8-bit RGBA images, as well as the four output images from alpha,
/// and creates six 8-bit R images, halving in resolution each step.
///
class Beta {
public:
Beta() = default;
///
/// Initialize the shaderchain.
///
/// @param device The Vulkan device to create the resources on.
/// @param shaderpool The shader pool to use for shader modules.
/// @param pool The descriptor pool to allocate in.
/// @param inImgs_0 The next input images to process (when fc % 3 == 0)
/// @param inImgs_1 The prev input images to process (when fc % 3 == 0)
/// @param inImgs_2 The prev prev input images to process (when fc % 3 == 0)
/// @param genc Amount of frames to generate.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Beta(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
std::array<Core::Image, 4> inImgs_0,
std::array<Core::Image, 4> inImgs_1,
std::array<Core::Image, 4> inImgs_2,
size_t genc);
///
/// Dispatch the shaderchain.
///
/// @param buf The command buffer to use for dispatching.
/// @param fc The frame count, used to select the input images.
/// @param pass The pass number
///
/// @throws std::logic_error if the command buffer is not recording.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t fc, uint64_t pass);
/// Get the output images.
[[nodiscard]] const auto& getOutImages() const { return this->outImgs; }
/// Trivially copyable, moveable and destructible
Beta(const Beta&) noexcept = default;
Beta& operator=(const Beta&) noexcept = default;
Beta(Beta&&) noexcept = default;
Beta& operator=(Beta&&) noexcept = default;
~Beta() = default;
private:
std::array<Core::ShaderModule, 5> shaderModules;
std::array<Core::Pipeline, 5> pipelines;
std::array<Core::DescriptorSet, 3> descriptorSets; // first shader has special logic
std::array<Core::DescriptorSet, 3> specialDescriptorSets;
std::vector<Core::DescriptorSet> nDescriptorSets;
std::vector<Core::Buffer> buffers;
std::array<Core::Image, 4> inImgs_0;
std::array<Core::Image, 4> inImgs_1;
std::array<Core::Image, 4> inImgs_2;
std::array<Core::Image, 2> tempImgs1;
std::array<Core::Image, 2> tempImgs2;
std::array<Core::Image, 6> outImgs;
};
}
#endif // BETA_HPP

View file

@ -1,83 +0,0 @@
#ifndef DELTA_HPP
#define DELTA_HPP
#include "pool/shaderpool.hpp"
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorpool.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "core/device.hpp"
#include <array>
namespace LSFG::Shaderchains {
///
/// Shader chain delta.
///
/// Takes two 8-bit RGBA images and an optional third 16-bit half-res RGBA image,
/// producing a full-res 16-bit RGBA image.
///
class Delta {
public:
Delta() = default;
///
/// Initialize the shaderchain.
///
/// @param device The Vulkan device to create the resources on.
/// @param shaderpool The shader pool to use for shader modules.
/// @param pool The descriptor pool to allocate in.
/// @param inImgs The input images to process.
/// @param optImg An optional additional input from the previous pass.
/// @param genc Amount of frames to generate.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Delta(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
std::array<Core::Image, 2> inImgs,
std::optional<Core::Image> optImg,
size_t genc);
///
/// Dispatch the shaderchain.
///
/// @param buf The command buffer to use for dispatching.
/// @param pass The pass number.
///
/// @throws std::logic_error if the command buffer is not recording.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t pass);
/// Get the output image.
[[nodiscard]] const auto& getOutImage() const { return this->outImg; }
/// Trivially copyable, moveable and destructible
Delta(const Delta&) noexcept = default;
Delta& operator=(const Delta&) noexcept = default;
Delta(Delta&&) noexcept = default;
Delta& operator=(Delta&&) noexcept = default;
~Delta() = default;
private:
std::array<Core::ShaderModule, 4> shaderModules;
std::array<Core::Pipeline, 4> pipelines;
std::array<Core::DescriptorSet, 3> descriptorSets;
std::vector<Core::DescriptorSet> nDescriptorSets;
std::vector<Core::Buffer> buffers;
std::array<Core::Image, 2> inImgs;
std::optional<Core::Image> optImg;
std::array<Core::Image, 2> tempImgs1;
std::array<Core::Image, 2> tempImgs2;
Core::Image outImg;
};
}
#endif // DELTA_HPP

View file

@ -1,76 +0,0 @@
#ifndef DOWNSAMPLE_HPP
#define DOWNSAMPLE_HPP
#include "pool/shaderpool.hpp"
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorpool.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "core/device.hpp"
#include <array>
namespace LSFG::Shaderchains {
///
/// Downsample shader.
///
/// Takes an 8-bit RGBA image and downsamples it into 7x 8-bit R images.
///
class Downsample {
public:
Downsample() = default;
///
/// Initialize the shaderchain.
///
/// @param device The Vulkan device to create the resources on.
/// @param shaderpool The shader pool to use for shader modules.
/// @param pool The descriptor pool to allocate in.
/// @param inImg_0 The next full image to downsample (when fc % 2 == 0)
/// @param inImg_1 The next full image to downsample (when fc % 2 == 1)
/// @param genc Amount of frames to generate.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Downsample(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
Core::Image inImg_0, Core::Image inImg_1,
size_t genc);
///
/// Dispatch the shaderchain.
///
/// @param buf The command buffer to use for dispatching.
/// @param fc The frame count, used to select the input image.
///
/// @throws std::logic_error if the command buffer is not recording.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t fc);
/// Get the output images.
[[nodiscard]] const auto& getOutImages() const { return this->outImgs; }
/// Trivially copyable, moveable and destructible
Downsample(const Downsample&) noexcept = default;
Downsample& operator=(const Downsample&) noexcept = default;
Downsample(Downsample&&) noexcept = default;
Downsample& operator=(Downsample&&) noexcept = default;
~Downsample() = default;
private:
Core::ShaderModule shaderModule;
Core::Pipeline pipeline;
std::array<Core::DescriptorSet, 2> descriptorSets; // one for each input image
Core::Buffer buffer;
Core::Image inImg_0, inImg_1;
std::array<Core::Image, 7> outImgs;
};
}
#endif // DOWNSAMPLE_HPP

View file

@ -1,86 +0,0 @@
#ifndef EPSILON_HPP
#define EPSILON_HPP
#include "pool/shaderpool.hpp"
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorpool.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "core/device.hpp"
#include <array>
namespace LSFG::Shaderchains {
///
/// Shader chain epsilon.
///
/// Takes three 8-bit RGBA textures, a fourth 8-bit R texture, an optional fifth
/// half-res 16-bit RGBA texture and produces a full-res 16-bit RGBA texture.
///
class Epsilon {
public:
Epsilon() = default;
///
/// Initialize the shaderchain.
///
/// @param device The Vulkan device to create the resources on.
/// @param shaderpool The shader pool to use for shader modules.
/// @param pool The descriptor pool to use for descriptor sets.
/// @param inImgs1 The first set of input images to process.
/// @param inImg2 The second type image to process.
/// @param optImg An optional additional input from the previous pass.
/// @param genc Amount of frames to generate.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Epsilon(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
std::array<Core::Image, 3> inImgs1,
Core::Image inImg2,
std::optional<Core::Image> optImg,
size_t genc);
///
/// Dispatch the shaderchain.
///
/// @param buf The command buffer to use for dispatching.
/// @param pass The pass number.
///
/// @throws std::logic_error if the command buffer is not recording.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t pass);
/// Get the output image.
[[nodiscard]] const auto& getOutImage() const { return this->outImg; }
/// Trivially copyable, moveable and destructible
Epsilon(const Epsilon&) noexcept = default;
Epsilon& operator=(const Epsilon&) noexcept = default;
Epsilon(Epsilon&&) noexcept = default;
Epsilon& operator=(Epsilon&&) noexcept = default;
~Epsilon() = default;
private:
std::array<Core::ShaderModule, 4> shaderModules;
std::array<Core::Pipeline, 4> pipelines;
std::array<Core::DescriptorSet, 3> descriptorSets;
std::vector<Core::DescriptorSet> nDescriptorSets;
std::vector<Core::Buffer> buffers;
std::array<Core::Image, 3> inImgs1;
Core::Image inImg2;
std::optional<Core::Image> optImg;
std::array<Core::Image, 4> tempImgs1;
std::array<Core::Image, 4> tempImgs2;
Core::Image outImg;
};
}
#endif // EPSILON_HPP

View file

@ -1,81 +0,0 @@
#ifndef EXTRACT_HPP
#define EXTRACT_HPP
#include "pool/shaderpool.hpp"
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorpool.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "core/device.hpp"
namespace LSFG::Shaderchains {
///
/// Shader chain extract.
///
/// Takes two half-res 16-bit RGBA textures, producing
/// an full-res 8-bit RGBA texture.
///
class Extract {
public:
Extract() = default;
///
/// Initialize the shaderchain.
///
/// @param device The Vulkan device to create the resources on.
/// @param shaderpool The shader pool to use for shader modules.
/// @param pool The descriptor pool to use for descriptor sets.
/// @param inImg1 The first set of input images to process.
/// @param inImg2 The second type image to process.
/// @param outExtent The extent of the output image.
/// @param genc Amount of frames to generate.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Extract(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
Core::Image inImg1,
Core::Image inImg2,
VkExtent2D outExtent,
size_t genc);
///
/// Dispatch the shaderchain.
///
/// @param buf The command buffer to use for dispatching.
/// @param pass The pass number.
///
/// @throws std::logic_error if the command buffer is not recording.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t pass);
/// Get the output image.
[[nodiscard]] const auto& getOutImage() const { return this->outImg; }
/// Trivially copyable, moveable and destructible
Extract(const Extract&) noexcept = default;
Extract& operator=(const Extract&) noexcept = default;
Extract(Extract&&) noexcept = default;
Extract& operator=(Extract&&) noexcept = default;
~Extract() = default;
private:
Core::ShaderModule shaderModule;
Core::Pipeline pipeline;
std::vector<Core::DescriptorSet> nDescriptorSets;
std::vector<Core::Buffer> buffers;
Core::Image inImg1;
Core::Image inImg2;
Core::Image whiteImg;
Core::Image outImg;
};
}
#endif // EXTRACT_HPP

View file

@ -1,106 +0,0 @@
#ifndef GAMMA_HPP
#define GAMMA_HPP
#include "pool/shaderpool.hpp"
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorpool.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "core/device.hpp"
#include <array>
namespace LSFG::Shaderchains {
///
/// Shader chain gamma.
///
/// Takes four temporal 8-bit RGBA images, as well as four output images from a given alpha stage.
/// Also takes the corresponding (smallest if oob) output image from the beta pass.
/// On non-first passes optionally takes 2 output images from previous gamma pass.
/// Creates two images, one at twice the resolution of input images and the other with R16G16B16A16_FLOAT.
///
class Gamma {
public:
Gamma() = default;
///
/// Initialize the shaderchain.
///
/// @param device The Vulkan device to create the resources on.
/// @param shaderpool The shader pool to use for shader modules.
/// @param pool The descriptor pool to allocate in.
/// @param inImgs1_0 The next input images to process (when fc % 3 == 0).
/// @param inImgs1_1 The prev input images to process (when fc % 3 == 0).
/// @param inImgs1_2 Initially unprocessed prev prev input images (when fc % 3 == 0).
/// @param inImg2 The second input image to process, next step up the resolution.
/// @param optImg1 An optional additional input from the previous pass.
/// @param optImg2 An optional additional input image for processing non-first passes.
/// @param outExtent The extent of the output image.
/// @param genc Amount of frames to generate.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Gamma(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
std::array<Core::Image, 4> inImgs1_0,
std::array<Core::Image, 4> inImgs1_1,
std::array<Core::Image, 4> inImgs1_2,
Core::Image inImg2,
std::optional<Core::Image> optImg1,
std::optional<Core::Image> optImg2,
VkExtent2D outExtent,
size_t genc);
///
/// Dispatch the shaderchain.
///
/// @param buf The command buffer to use for dispatching.
/// @param fc The frame count, used to select the input images.
/// @param pass The pass number.
///
/// @throws std::logic_error if the command buffer is not recording.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t fc, uint64_t pass);
/// Get the first output image.
[[nodiscard]] const auto& getOutImage1() const { return this->outImg1; }
/// Get the second output image.
[[nodiscard]] const auto& getOutImage2() const { return this->outImg2; }
/// Trivially copyable, moveable and destructible
Gamma(const Gamma&) noexcept = default;
Gamma& operator=(const Gamma&) noexcept = default;
Gamma(Gamma&&) noexcept = default;
Gamma& operator=(Gamma&&) noexcept = default;
~Gamma() = default;
private:
std::array<Core::ShaderModule, 6> shaderModules;
std::array<Core::Pipeline, 6> pipelines;
std::array<Core::DescriptorSet, 3> descriptorSets; // first shader has special logic
std::vector<Core::DescriptorSet> n1DescriptorSets;
std::vector<Core::DescriptorSet> n2DescriptorSets;
std::vector<std::array<Core::DescriptorSet, 3>> nSpecialDescriptorSets;
std::vector<Core::Buffer> buffers;
std::array<Core::Image, 4> inImgs1_0;
std::array<Core::Image, 4> inImgs1_1;
std::array<Core::Image, 4> inImgs1_2;
Core::Image inImg2;
Core::Image optImg1; // specified or created black
std::optional<Core::Image> optImg2;
std::array<Core::Image, 4> tempImgs1;
std::array<Core::Image, 4> tempImgs2;
Core::Image whiteImg;
Core::Image outImg1;
Core::Image outImg2;
};
}
#endif // GAMMA_HPP

View file

@ -1,98 +0,0 @@
#ifndef MAGIC_HPP
#define MAGIC_HPP
#include "pool/shaderpool.hpp"
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorpool.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "core/device.hpp"
#include <array>
namespace LSFG::Shaderchains {
///
/// Shader chain magic.
///
/// Takes textures similar to gamma shader chain, produces intermediary
/// results in groups of 3, 2, 2.
///
class Magic {
public:
Magic() = default;
///
/// Initialize the shaderchain.
///
/// @param device The Vulkan device to create the resources on.
/// @param shaderpool The shader pool to use for shader modules.
/// @param pool The descriptor pool to use for descriptor sets.
/// @param inImgs1_0 The next input images to process (when fc % 3 == 0).
/// @param inImgs1_1 The prev input images to process (when fc % 3 == 0).
/// @param inImgs1_2 Initially unprocessed prev prev input images (when fc % 3 == 0).
/// @param inImg2 The second input image to process.
/// @param inImg3 The third input image to process, next step up the resolution.
/// @param optImg An optional additional input from the previous pass.
/// @param genc Amount of frames to generate.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Magic(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
std::array<Core::Image, 4> inImgs1_0,
std::array<Core::Image, 4> inImgs1_1,
std::array<Core::Image, 4> inImgs1_2,
Core::Image inImg2,
Core::Image inImg3,
std::optional<Core::Image> optImg,
size_t genc);
///
/// Dispatch the shaderchain.
///
/// @param buf The command buffer to use for dispatching.
/// @param fc The frame count, used to select the input images.
/// @param pass The pass number.
///
/// @throws std::logic_error if the command buffer is not recording.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t fc, uint64_t pass);
/// Get the first set of output images
[[nodiscard]] const auto& getOutImages1() const { return this->outImgs1; }
/// Get the second set of output images
[[nodiscard]] const auto& getOutImages2() const { return this->outImgs2; }
/// Get the third set of output images
[[nodiscard]] const auto& getOutImages3() const { return this->outImgs3; }
/// Trivially copyable, moveable and destructible
Magic(const Magic&) noexcept = default;
Magic& operator=(const Magic&) noexcept = default;
Magic(Magic&&) noexcept = default;
Magic& operator=(Magic&&) noexcept = default;
~Magic() = default;
private:
Core::ShaderModule shaderModule;
Core::Pipeline pipeline;
std::vector<std::array<Core::DescriptorSet, 3>> nDescriptorSets;
std::vector<Core::Buffer> buffers;
std::array<Core::Image, 4> inImgs1_0;
std::array<Core::Image, 4> inImgs1_1;
std::array<Core::Image, 4> inImgs1_2;
Core::Image inImg2;
Core::Image inImg3;
std::optional<Core::Image> optImg;
std::array<Core::Image, 2> outImgs1;
std::array<Core::Image, 3> outImgs2;
std::array<Core::Image, 3> outImgs3;
};
}
#endif // MAGIC_HPP

View file

@ -1,91 +0,0 @@
#ifndef MERGE_HPP
#define MERGE_HPP
#include "pool/shaderpool.hpp"
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorpool.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "core/device.hpp"
#include <array>
namespace LSFG::Shaderchains {
///
/// Shader chain merge.
///
/// Takes the two previous frames as well as related resources
/// and merges them into a new frame.
///
class Merge {
public:
Merge() = default;
///
/// Initialize the shaderchain.
///
/// @param device The Vulkan device to create the resources on.
/// @param shaderpool The shader pool to use for shader modules.
/// @param pool The descriptor pool to use for descriptor sets.
/// @param inImg1 The prev full image when fc % 2 == 0
/// @param inImg2 The next full image when fc % 2 == 0
/// @param inImg3 The first related input texture
/// @param inImg4 The second related input texture
/// @param inImg5 The third related input texture
/// @param outFds File descriptors for the output images.
/// @param genc The amount of frames to generaten.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Merge(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
Core::Image inImg1,
Core::Image inImg2,
Core::Image inImg3,
Core::Image inImg4,
Core::Image inImg5,
const std::vector<int>& outFds,
size_t genc);
///
/// Dispatch the shaderchain.
///
/// @param buf The command buffer to use for dispatching.
/// @param fc The frame count, used to select the input images.
/// @param pass The pass number.
///
/// @throws std::logic_error if the command buffer is not recording.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t fc, uint64_t pass);
/// Get the output image
[[nodiscard]] const auto& getOutImage(size_t pass) const { return this->outImgs.at(pass); }
/// Trivially copyable, moveable and destructible
Merge(const Merge&) noexcept = default;
Merge& operator=(const Merge&) noexcept = default;
Merge(Merge&&) noexcept = default;
Merge& operator=(Merge&&) noexcept = default;
~Merge() = default;
private:
Core::ShaderModule shaderModule;
Core::Pipeline pipeline;
std::vector<std::array<Core::DescriptorSet, 2>> nDescriptorSets; // per combo
std::vector<Core::Buffer> buffers;
Core::Image inImg1;
Core::Image inImg2;
Core::Image inImg3;
Core::Image inImg4;
Core::Image inImg5;
std::vector<Core::Image> outImgs;
};
}
#endif // MERGE_HPP

View file

@ -1,86 +0,0 @@
#ifndef ZETA_HPP
#define ZETA_HPP
#include "pool/shaderpool.hpp"
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorpool.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "core/device.hpp"
#include <array>
namespace LSFG::Shaderchains {
///
/// Shader chain zeta.
///
/// Takes three 8-bit RGBA textures, a fourth 8-bit R texture, a fifth
/// half-res 16-bit RGBA texture and produces a full-res 16-bit RGBA texture.
///
class Zeta {
public:
Zeta() = default;
///
/// Initialize the shaderchain.
///
/// @param device The Vulkan device to create the resources on.
/// @param shaderpool The shader pool to use for shader modules.
/// @param pool The descriptor pool to use for descriptor sets.
/// @param inImgs1 The first set of input images to process.
/// @param inImg2 The second type image to process.
/// @param inImg3 The third type image to process.
/// @param genc The amount of frames to generate.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Zeta(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
std::array<Core::Image, 3> inImgs1,
Core::Image inImg2,
Core::Image inImg3,
size_t genc);
///
/// Dispatch the shaderchain.
///
/// @param buf The command buffer to use for dispatching.
/// @param pass The pass number.
///
/// @throws std::logic_error if the command buffer is not recording.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t pass);
/// Get the output image.
[[nodiscard]] const auto& getOutImage() const { return this->outImg; }
/// Trivially copyable, moveable and destructible
Zeta(const Zeta&) noexcept = default;
Zeta& operator=(const Zeta&) noexcept = default;
Zeta(Zeta&&) noexcept = default;
Zeta& operator=(Zeta&&) noexcept = default;
~Zeta() = default;
private:
std::array<Core::ShaderModule, 4> shaderModules;
std::array<Core::Pipeline, 4> pipelines;
std::array<Core::DescriptorSet, 3> descriptorSets;
std::vector<Core::DescriptorSet> nDescriptorSets;
std::vector<Core::Buffer> buffers;
std::array<Core::Image, 3> inImgs1;
Core::Image inImg2;
Core::Image inImg3;
std::array<Core::Image, 4> tempImgs1;
std::array<Core::Image, 4> tempImgs2;
Core::Image outImg;
};
}
#endif // ZETA_HPP

View file

@ -1,184 +0,0 @@
#include "context.hpp"
#include "core/fence.hpp"
#include "core/semaphore.hpp"
#include "pool/shaderpool.hpp"
#include "lsfg.hpp"
#include <vulkan/vulkan_core.h>
#include <format>
#include <optional>
using namespace LSFG;
Context::Context(const Core::Device& device, Pool::ShaderPool& shaderpool,
uint32_t width, uint32_t height, int in0, int in1,
const std::vector<int>& outN) {
const VkFormat format = getenv("LSFG_HDR") == nullptr
? VK_FORMAT_R8G8B8A8_UNORM
: VK_FORMAT_R16G16B16A16_SFLOAT;
// import images
this->inImg_0 = Core::Image(device, { width, height },
format,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT,
in0);
this->inImg_1 = Core::Image(device, { width, height },
format,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT,
in1);
// prepare render infos
this->descPool = Core::DescriptorPool(device);
this->cmdPool = Core::CommandPool(device);
for (size_t i = 0; i < 8; i++) {
auto& info = this->renderInfos.at(i);
info.internalSemaphores.resize(outN.size());
info.cmdBuffers2.resize(outN.size());
info.outSemaphores.resize(outN.size());
}
// create shader chains
this->downsampleChain = Shaderchains::Downsample(device, shaderpool, this->descPool,
this->inImg_0, this->inImg_1, outN.size());
for (size_t i = 0; i < 7; i++)
this->alphaChains.at(i) = Shaderchains::Alpha(device, shaderpool, this->descPool,
this->downsampleChain.getOutImages().at(i));
this->betaChain = Shaderchains::Beta(device, shaderpool, this->descPool,
this->alphaChains.at(0).getOutImages0(),
this->alphaChains.at(0).getOutImages1(),
this->alphaChains.at(0).getOutImages2(), outN.size());
for (size_t i = 0; i < 7; i++) {
if (i < 4) {
this->gammaChains.at(i) = Shaderchains::Gamma(device, shaderpool, this->descPool,
this->alphaChains.at(6 - i).getOutImages0(),
this->alphaChains.at(6 - i).getOutImages1(),
this->alphaChains.at(6 - i).getOutImages2(),
this->betaChain.getOutImages().at(std::min(5UL, 6 - i)),
i == 0 ? std::nullopt
: std::optional{this->gammaChains.at(i - 1).getOutImage2()},
i == 0 ? std::nullopt
: std::optional{this->gammaChains.at(i - 1).getOutImage1()},
this->alphaChains.at(6 - i - 1).getOutImages0().at(0).getExtent(),
outN.size()
);
} else {
this->magicChains.at(i - 4) = Shaderchains::Magic(device, shaderpool, this->descPool,
this->alphaChains.at(6 - i).getOutImages0(),
this->alphaChains.at(6 - i).getOutImages1(),
this->alphaChains.at(6 - i).getOutImages2(),
i == 4 ? this->gammaChains.at(i - 1).getOutImage2()
: this->extractChains.at(i - 5).getOutImage(),
i == 4 ? this->gammaChains.at(i - 1).getOutImage1()
: this->zetaChains.at(i - 5).getOutImage(),
i == 4 ? std::nullopt : std::optional{this->epsilonChains.at(i - 5).getOutImage()},
outN.size()
);
this->deltaChains.at(i - 4) = Shaderchains::Delta(device, shaderpool, this->descPool,
this->magicChains.at(i - 4).getOutImages1(),
i == 4 ? std::nullopt
: std::optional{this->deltaChains.at(i - 5).getOutImage()},
outN.size()
);
this->epsilonChains.at(i - 4) = Shaderchains::Epsilon(device, shaderpool, this->descPool,
this->magicChains.at(i - 4).getOutImages2(),
this->betaChain.getOutImages().at(6 - i),
i == 4 ? std::nullopt
: std::optional{this->epsilonChains.at(i - 5).getOutImage()},
outN.size()
);
this->zetaChains.at(i - 4) = Shaderchains::Zeta(device, shaderpool, this->descPool,
this->magicChains.at(i - 4).getOutImages3(),
i == 4 ? this->gammaChains.at(i - 1).getOutImage1()
: this->zetaChains.at(i - 5).getOutImage(),
this->betaChain.getOutImages().at(6 - i),
outN.size()
);
if (i >= 6)
continue; // no extract for i >= 6
this->extractChains.at(i - 4) = Shaderchains::Extract(device, shaderpool, this->descPool,
this->zetaChains.at(i - 4).getOutImage(),
this->epsilonChains.at(i - 4).getOutImage(),
this->alphaChains.at(6 - i - 1).getOutImages0().at(0).getExtent(),
outN.size()
);
}
}
this->mergeChain = Shaderchains::Merge(device, shaderpool, this->descPool,
this->inImg_1,
this->inImg_0,
this->zetaChains.at(2).getOutImage(),
this->epsilonChains.at(2).getOutImage(),
this->deltaChains.at(2).getOutImage(),
outN,
outN.size()
);
}
void Context::present(const Core::Device& device, int inSem,
const std::vector<int>& outSem) {
auto& info = this->renderInfos.at(this->frameIdx % 8);
// 3. wait for completion of previous frame in this slot
if (info.completionFences.has_value())
for (auto& fence : *info.completionFences)
if (!fence.wait(device, UINT64_MAX)) // should not take any time
throw vulkan_error(VK_ERROR_DEVICE_LOST, "Fence wait timed out");
// 1. downsample and process input image
info.inSemaphore = Core::Semaphore(device, inSem);
for (size_t i = 0; i < outSem.size(); i++)
info.internalSemaphores.at(i) = Core::Semaphore(device);
info.cmdBuffer1 = Core::CommandBuffer(device, this->cmdPool);
info.cmdBuffer1.begin();
this->downsampleChain.Dispatch(info.cmdBuffer1, this->frameIdx);
for (size_t i = 0; i < 7; i++)
this->alphaChains.at(6 - i).Dispatch(info.cmdBuffer1, this->frameIdx);
info.cmdBuffer1.end();
info.cmdBuffer1.submit(device.getComputeQueue(), std::nullopt,
{ info.inSemaphore }, std::nullopt,
info.internalSemaphores, std::nullopt);
// 2. generate intermediary frames
info.completionFences.emplace();
for (size_t pass = 0; pass < outSem.size(); pass++) {
auto& completionFence = info.completionFences->emplace_back(device);
auto& outSemaphore = info.outSemaphores.at(pass);
outSemaphore = Core::Semaphore(device, outSem.at(pass));
auto& cmdBuffer2 = info.cmdBuffers2.at(pass);
cmdBuffer2 = Core::CommandBuffer(device, this->cmdPool);
cmdBuffer2.begin();
this->betaChain.Dispatch(cmdBuffer2, this->frameIdx, pass);
for (size_t i = 0; i < 4; i++)
this->gammaChains.at(i).Dispatch(cmdBuffer2, this->frameIdx, pass);
for (size_t i = 0; i < 3; i++) {
this->magicChains.at(i).Dispatch(cmdBuffer2, this->frameIdx, pass);
this->deltaChains.at(i).Dispatch(cmdBuffer2, pass);
this->epsilonChains.at(i).Dispatch(cmdBuffer2, pass);
this->zetaChains.at(i).Dispatch(cmdBuffer2, pass);
if (i < 2)
this->extractChains.at(i).Dispatch(cmdBuffer2, pass);
}
this->mergeChain.Dispatch(cmdBuffer2, this->frameIdx, pass);
cmdBuffer2.end();
cmdBuffer2.submit(device.getComputeQueue(), completionFence,
{ info.internalSemaphores.at(pass) }, std::nullopt,
{ outSemaphore }, std::nullopt);
}
this->frameIdx++;
}
vulkan_error::vulkan_error(VkResult result, const std::string& message)
: std::runtime_error(std::format("{} (error {})", message, static_cast<int32_t>(result))), result(result) {}
vulkan_error::~vulkan_error() noexcept = default;

View file

@ -1,100 +0,0 @@
#include "lsfg.hpp"
#include "core/device.hpp"
#include "core/instance.hpp"
#include "context.hpp"
#include "pool/shaderpool.hpp"
#include "utils/utils.hpp"
#include <cstdlib>
#include <ctime>
#include <optional>
#include <string>
#include <unordered_map>
using namespace LSFG;
namespace {
std::optional<Core::Instance> instance;
std::optional<Core::Device> device;
std::optional<Pool::ShaderPool> pool;
std::unordered_map<int32_t, Context> contexts;
}
void LSFG::initialize() {
if (instance.has_value() || device.has_value())
return;
char* dllPath = getenv("LSFG_DLL_PATH");
std::string dllPathStr; // (absolutely beautiful code)
if (dllPath && *dllPath != '\0') {
dllPathStr = std::string(dllPath);
} else {
const char* dataDir = getenv("XDG_DATA_HOME");
if (dataDir && *dataDir != '\0') {
dllPathStr = std::string(dataDir) +
"/Steam/steamapps/common/Lossless Scaling/Lossless.dll";
} else {
const char* homeDir = getenv("HOME");
if (homeDir && *homeDir != '\0') {
dllPathStr = std::string(homeDir) +
"/.local/share/Steam/steamapps/common/Lossless Scaling/Lossless.dll";
} else {
dllPathStr = "Lossless.dll";
}
}
}
instance.emplace();
device.emplace(*instance);
pool.emplace(dllPathStr);
Globals::initializeGlobals(*device);
std::srand(static_cast<uint32_t>(std::time(nullptr)));
}
int32_t LSFG::createContext(uint32_t width, uint32_t height, int in0, int in1,
const std::vector<int>& outN) {
if (!instance.has_value() || !device.has_value() || !pool.has_value())
throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "LSFG not initialized");
auto id = std::rand();
contexts.emplace(id, Context(*device, *pool, width, height, in0, in1, outN));
return id;
}
void LSFG::presentContext(int32_t id, int inSem, const std::vector<int>& outSem) {
if (!instance.has_value() || !device.has_value() || !pool.has_value())
throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "LSFG not initialized");
auto it = contexts.find(id);
if (it == contexts.end())
throw LSFG::vulkan_error(VK_ERROR_DEVICE_LOST, "No such context");
Context& context = it->second;
context.present(*device, inSem, outSem);
}
void LSFG::deleteContext(int32_t id) {
if (!instance.has_value() || !device.has_value() || !pool.has_value())
throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "LSFG not initialized");
auto it = contexts.find(id);
if (it == contexts.end())
throw LSFG::vulkan_error(VK_ERROR_DEVICE_LOST, "No such context");
vkDeviceWaitIdle(device->handle());
contexts.erase(it);
}
void LSFG::finalize() {
if (!instance.has_value() || !device.has_value() || !pool.has_value())
return;
Globals::uninitializeGlobals();
vkDeviceWaitIdle(device->handle());
pool.reset();
device.reset();
instance.reset();
}

View file

@ -1,89 +0,0 @@
#include "pool/extract.hpp"
#include <dxbc/dxbc_modinfo.h>
#include <dxbc/dxbc_module.h>
#include <dxbc/dxbc_reader.h>
#include <dxvk/dxvk_compute.h>
#include <dxvk/dxvk_context.h>
#include <dxvk/dxvk_pipelayout.h>
#include <dxvk/dxvk_shader.h>
#include <pe-parse/parse.h>
#include <openssl/sha.h>
#include <openssl/evp.h>
#include <algorithm>
#include <stdexcept>
#include <vector>
using namespace LSFG;
using namespace LSFG::Pool;
namespace {
using ResourceMap = std::unordered_map<uint32_t, std::vector<uint8_t>>;
uint32_t fnv1a_hash(const std::vector<uint8_t>& data) {
// does not need be secure
uint32_t hash = 0x811C9DC5;
for (auto byte : data) {
hash ^= byte;
hash *= 0x01000193;
}
return hash;
}
/// Callback function for each resource.
int on_resource(void* data, const peparse::resource& res) {
if (res.type != peparse::RT_RCDATA || res.buf == nullptr || res.buf->bufLen <= 0)
return 0;
std::vector<uint8_t> resource_data(res.buf->bufLen);
std::copy_n(res.buf->buf, res.buf->bufLen, resource_data.data());
const uint32_t hash = fnv1a_hash(resource_data);
auto* map = reinterpret_cast<ResourceMap*>(data);
(*map)[hash] = resource_data;
return 0;
}
}
Extractor::Extractor(const std::string& path) {
peparse::parsed_pe* pe = peparse::ParsePEFromFile(path.c_str());
if (!pe)
throw std::runtime_error("Unable to parse PE file: " + path);
peparse::IterRsrc(pe, on_resource, &this->resources);
peparse::DestructParsedPE(pe);
}
std::vector<uint8_t> Extractor::getResource(uint32_t hash) const {
auto it = this->resources.find(hash);
if (it != this->resources.end())
return it->second;
throw std::runtime_error("Resource not found.");
}
std::vector<uint8_t> Pool::dxbcToSpirv(const std::vector<uint8_t>& dxbc) {
// compile the shader
dxvk::DxbcReader reader(reinterpret_cast<const char*>(dxbc.data()), dxbc.size());
dxvk::DxbcModule module(reader);
const dxvk::DxbcModuleInfo info{};
auto shader = module.compile(info, "CS");
// extract spir-v from d3d11 shader
auto code = shader->getRawCode();
// patch binding offsets
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
for (size_t i = 0; i < shader->m_bindingOffsets.size(); i++)
code.data()[shader->m_bindingOffsets.at(i).bindingOffset] = static_cast<uint8_t>(i); // NOLINT
#pragma clang diagnostic pop
std::vector<uint8_t> spirv(code.size());
std::copy_n(reinterpret_cast<uint8_t*>(code.data()),
code.size(), spirv.data());
return spirv;
}

View file

@ -1,80 +0,0 @@
#include "pool/shaderpool.hpp"
using namespace LSFG;
using namespace LSFG::Pool;
const std::unordered_map<std::string, uint32_t> SHADERS = {
{ "downsample.spv", 0xe365474d },
{ "alpha/0.spv", 0x35f63c83 },
{ "alpha/1.spv", 0x83e5240d },
{ "alpha/2.spv", 0x5d64d9f1 },
{ "alpha/3.spv", 0xad77afe1 },
{ "beta/0.spv", 0xa986ccbb },
{ "beta/1.spv", 0x60944cf5 },
{ "beta/2.spv", 0xb1c8f69b },
{ "beta/3.spv", 0x87cbe880 },
{ "beta/4.spv", 0xc2c5507d },
{ "gamma/0.spv", 0xccce9dab },
{ "gamma/1.spv", 0x7719e229 },
{ "gamma/2.spv", 0xfb1a7643 },
{ "gamma/3.spv", 0xe0553cd8 },
{ "gamma/4.spv", 0xf73c136f },
{ "gamma/5.spv", 0xa34959c },
{ "magic.spv", 0x443ea7a1 },
{ "delta/0.spv", 0x141daaac },
{ "delta/1.spv", 0x2a0ed691 },
{ "delta/2.spv", 0x23bdc583 },
{ "delta/3.spv", 0x52bc5e0f },
{ "epsilon/0.spv", 0x128eb7d7 },
{ "epsilon/1.spv", 0xbab811ad },
{ "epsilon/2.spv", 0x1d4b902d },
{ "epsilon/3.spv", 0x91236549 },
{ "zeta/0.spv", 0x7719e229 },
{ "zeta/1.spv", 0xfb1a7643 },
{ "zeta/2.spv", 0xe0553cd8 },
{ "zeta/3.spv", 0xf73c136f },
{ "extract.spv", 0xb6cb084a },
{ "merge.spv", 0xfc0aedfa }
};
Core::ShaderModule ShaderPool::getShader(
const Core::Device& device, const std::string& name,
const std::vector<std::pair<size_t, VkDescriptorType>>& types) {
auto it = shaders.find(name);
if (it != shaders.end())
return it->second;
// grab the shader
auto hit = SHADERS.find(name);
if (hit == SHADERS.end())
throw std::runtime_error("Shader not found: " + name);
auto hash = hit->second;
auto dxbc = this->extractor.getResource(hash);
if (dxbc.empty())
throw std::runtime_error("Shader code is empty: " + name);
// create the translated shader module
auto spirv = dxbcToSpirv(dxbc);
if (spirv.empty())
throw std::runtime_error("Shader code translation failed: " + name);
Core::ShaderModule shader(device, spirv, types);
shaders[name] = shader;
return shader;
}
Core::Pipeline ShaderPool::getPipeline(
const Core::Device& device, const std::string& name) {
auto it = pipelines.find(name);
if (it != pipelines.end())
return it->second;
// grab the shader module
auto shader = this->getShader(device, name, {});
// create the pipeline
Core::Pipeline pipeline(device, shader);
pipelines[name] = pipeline;
return pipeline;
}

View file

@ -1,176 +0,0 @@
#include "shaderchains/alpha.hpp"
#include "utils/utils.hpp"
using namespace LSFG::Shaderchains;
Alpha::Alpha(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
Core::Image inImg)
: inImg(std::move(inImg)) {
this->shaderModules = {{
shaderpool.getShader(device, "alpha/0.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "alpha/1.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "alpha/2.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "alpha/3.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
shaderpool.getPipeline(device, "alpha/0.spv"),
shaderpool.getPipeline(device, "alpha/1.spv"),
shaderpool.getPipeline(device, "alpha/2.spv"),
shaderpool.getPipeline(device, "alpha/3.spv")
}};
for (size_t i = 0; i < 3; i++)
this->descriptorSets.at(i) = Core::DescriptorSet(device, pool,
this->shaderModules.at(i));
for (size_t i = 0; i < 3; i++)
this->specialDescriptorSets.at(i) = Core::DescriptorSet(device, pool,
this->shaderModules.at(3));
const auto extent = this->inImg.getExtent();
const VkExtent2D halfExtent = {
.width = (extent.width + 1) >> 1,
.height = (extent.height + 1) >> 1
};
for (size_t i = 0; i < 2; i++) {
this->tempImgs1.at(i) = Core::Image(device,
halfExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->tempImgs2.at(i) = Core::Image(device,
halfExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
}
const VkExtent2D quarterExtent = {
.width = (extent.width + 3) >> 2,
.height = (extent.height + 3) >> 2
};
for (size_t i = 0; i < 4; i++) {
this->tempImgs3.at(i) = Core::Image(device,
quarterExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->outImgs_0.at(i) = Core::Image(device,
quarterExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT
| VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_ASPECT_COLOR_BIT);
this->outImgs_1.at(i) = Core::Image(device,
quarterExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT
| VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_ASPECT_COLOR_BIT);
this->outImgs_2.at(i) = Core::Image(device,
quarterExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT
| VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
VK_IMAGE_ASPECT_COLOR_BIT);
}
this->descriptorSets.at(0).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
this->descriptorSets.at(1).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(2).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs3)
.build();
for (size_t fc = 0; fc < 3; fc++) {
auto* outImgs = &this->outImgs_0;
if (fc == 1) outImgs = &this->outImgs_1;
else if (fc == 2) outImgs = &this->outImgs_2;
this->specialDescriptorSets.at(fc).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs3)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, *outImgs)
.build();
}
// clear the output images (so they're not undefined)
for (size_t i = 0; i < 4; i++) {
Utils::clearImage(device, this->outImgs_0.at(i));
Utils::clearImage(device, this->outImgs_1.at(i));
Utils::clearImage(device, this->outImgs_2.at(i));
}
}
void Alpha::Dispatch(const Core::CommandBuffer& buf, uint64_t fc) {
const auto halfExtent = this->tempImgs1.at(0).getExtent();
const auto quarterExtent = this->tempImgs3.at(0).getExtent();
// first pass
uint32_t threadsX = (halfExtent.width + 7) >> 3;
uint32_t threadsY = (halfExtent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImg)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(0).bind(buf);
this->descriptorSets.at(0).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
this->descriptorSets.at(1).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third pass
threadsX = (quarterExtent.width + 7) >> 3;
threadsY = (quarterExtent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs3)
.build();
this->pipelines.at(2).bind(buf);
this->descriptorSets.at(2).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth pass
auto* outImgs = &this->outImgs_0;
if ((fc % 3) == 1) outImgs = &this->outImgs_1;
else if ((fc % 3) == 2) outImgs = &this->outImgs_2;
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs3)
.addR2W(*outImgs)
.build();
this->pipelines.at(3).bind(buf);
this->specialDescriptorSets.at(fc % 3).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -1,189 +0,0 @@
#include "shaderchains/beta.hpp"
#include "utils/utils.hpp"
using namespace LSFG::Shaderchains;
Beta::Beta(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
std::array<Core::Image, 4> inImgs_0,
std::array<Core::Image, 4> inImgs_1,
std::array<Core::Image, 4> inImgs_2,
size_t genc)
: inImgs_0(std::move(inImgs_0)),
inImgs_1(std::move(inImgs_1)),
inImgs_2(std::move(inImgs_2)) {
this->shaderModules = {{
shaderpool.getShader(device, "beta/0.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 8+4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "beta/1.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "beta/2.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "beta/3.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "beta/4.spv",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 6, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
shaderpool.getPipeline(device, "beta/0.spv"),
shaderpool.getPipeline(device, "beta/1.spv"),
shaderpool.getPipeline(device, "beta/2.spv"),
shaderpool.getPipeline(device, "beta/3.spv"),
shaderpool.getPipeline(device, "beta/4.spv")
}};
for (size_t i = 1; i < 4; i++)
this->descriptorSets.at(i - 1) = Core::DescriptorSet(device, pool,
this->shaderModules.at(i));
for (size_t i = 0; i < 3; i++)
this->specialDescriptorSets.at(i) = Core::DescriptorSet(device, pool,
this->shaderModules.at(0));
for (size_t i = 0; i < genc; i++)
this->nDescriptorSets.emplace_back(device, pool,
this->shaderModules.at(4));
for (size_t i = 0; i < genc; i++) {
auto data = Globals::fgBuffer;
data.timestamp = static_cast<float>(i + 1) / static_cast<float>(genc + 1);
this->buffers.emplace_back(device, data, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
}
const auto extent = this->inImgs_0.at(0).getExtent();
for (size_t i = 0; i < 2; i++) {
this->tempImgs1.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->tempImgs2.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
}
for (size_t i = 0; i < 6; i++) {
this->outImgs.at(i) = Core::Image(device,
{ extent.width >> i, extent.height >> i },
VK_FORMAT_R8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
}
for (size_t fc = 0; fc < 3; fc++) {
auto* nextImgs = &this->inImgs_0;
auto* prevImgs = &this->inImgs_2;
auto* pprevImgs = &this->inImgs_1;
if (fc == 1) {
nextImgs = &this->inImgs_1;
prevImgs = &this->inImgs_0;
pprevImgs = &this->inImgs_2;
} else if (fc == 2) {
nextImgs = &this->inImgs_2;
prevImgs = &this->inImgs_1;
pprevImgs = &this->inImgs_0;
}
this->specialDescriptorSets.at(fc).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, *pprevImgs)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, *prevImgs)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, *nextImgs)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
}
this->descriptorSets.at(0).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(1).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
this->descriptorSets.at(2).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
for (size_t i = 0; i < genc; i++) {
this->nDescriptorSets.at(i).update(device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffers.at(i))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs)
.build();
}
}
void Beta::Dispatch(const Core::CommandBuffer& buf, uint64_t fc, uint64_t pass) {
const auto extent = this->tempImgs1.at(0).getExtent();
// first pass
uint32_t threadsX = (extent.width + 7) >> 3;
uint32_t threadsY = (extent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImgs_0)
.addW2R(this->inImgs_1)
.addW2R(this->inImgs_2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(0).bind(buf);
this->specialDescriptorSets.at(fc % 3).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
this->descriptorSets.at(0).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(2).bind(buf);
this->descriptorSets.at(1).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(3).bind(buf);
this->descriptorSets.at(2).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
// fifth pass
threadsX = (extent.width + 31) >> 5;
threadsY = (extent.height + 31) >> 5;
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->outImgs)
.build();
this->pipelines.at(4).bind(buf);
this->nDescriptorSets.at(pass).bind(buf, this->pipelines.at(4));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -1,145 +0,0 @@
#include "shaderchains/delta.hpp"
#include "utils/utils.hpp"
using namespace LSFG::Shaderchains;
Delta::Delta(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
std::array<Core::Image, 2> inImgs,
std::optional<Core::Image> optImg,
size_t genc)
: inImgs(std::move(inImgs)),
optImg(std::move(optImg)) {
this->shaderModules = {{
shaderpool.getShader(device, "delta/0.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "delta/1.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "delta/2.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "delta/3.spv",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
shaderpool.getPipeline(device, "delta/0.spv"),
shaderpool.getPipeline(device, "delta/1.spv"),
shaderpool.getPipeline(device, "delta/2.spv"),
shaderpool.getPipeline(device, "delta/3.spv")
}};
for (size_t i = 0; i < 3; i++)
this->descriptorSets.at(i) = Core::DescriptorSet(device, pool,
this->shaderModules.at(i));
for (size_t i = 0; i < genc; i++)
this->nDescriptorSets.emplace_back(device, pool,
this->shaderModules.at(3));
for (size_t i = 0; i < genc; i++) {
auto data = Globals::fgBuffer;
data.timestamp = static_cast<float>(i + 1) / static_cast<float>(genc + 1);
data.firstIterS = !this->optImg.has_value();
this->buffers.emplace_back(device, data, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
}
const auto extent = this->inImgs.at(0).getExtent();
for (size_t i = 0; i < 2; i++) {
this->tempImgs1.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->tempImgs2.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
}
this->outImg = Core::Image(device,
extent,
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->descriptorSets.at(0).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
this->descriptorSets.at(1).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(2).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
for (size_t i = 0; i < genc; i++) {
this->nDescriptorSets.at(i).update(device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffers.at(i))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampEdge)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg)
.build();
}
}
void Delta::Dispatch(const Core::CommandBuffer& buf, uint64_t pass) {
const auto extent = this->tempImgs1.at(0).getExtent();
// first pass
const uint32_t threadsX = (extent.width + 7) >> 3;
const uint32_t threadsY = (extent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImgs)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(0).bind(buf);
this->descriptorSets.at(0).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
this->descriptorSets.at(1).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(2).bind(buf);
this->descriptorSets.at(2).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addW2R(this->optImg)
.addR2W(this->outImg)
.build();
this->pipelines.at(3).bind(buf);
this->nDescriptorSets.at(pass).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -1,60 +0,0 @@
#include "shaderchains/downsample.hpp"
#include "utils/utils.hpp"
using namespace LSFG::Shaderchains;
Downsample::Downsample(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
Core::Image inImg_0, Core::Image inImg_1,
size_t genc)
: inImg_0(std::move(inImg_0)),
inImg_1(std::move(inImg_1)) {
this->shaderModule = shaderpool.getShader(device, "downsample.spv",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } });
this->pipeline = shaderpool.getPipeline(device, "downsample.spv");
for (size_t i = 0; i < 2; i++)
this->descriptorSets.at(i) = Core::DescriptorSet(device, pool, this->shaderModule);
auto data = Globals::fgBuffer;
data.timestamp = 1.0F / static_cast<float>(genc + 1);
this->buffer = Core::Buffer(device, data, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
auto extent = this->inImg_0.getExtent();
for (size_t i = 0; i < 7; i++)
this->outImgs.at(i) = Core::Image(device,
{ extent.width >> i, extent.height >> i },
VK_FORMAT_R8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
for (size_t fc = 0; fc < 2; fc++) {
auto& inImg = (fc % 2 == 0) ? this->inImg_0 : this->inImg_1;
this->descriptorSets.at(fc).update(device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, inImg)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs)
.build();
}
}
void Downsample::Dispatch(const Core::CommandBuffer& buf, uint64_t fc) {
auto extent = this->inImg_0.getExtent();
// first pass
const uint32_t threadsX = (extent.width + 63) >> 6;
const uint32_t threadsY = (extent.height + 63) >> 6;
auto& inImg = (fc % 2 == 0) ? this->inImg_0 : this->inImg_1;
Utils::BarrierBuilder(buf)
.addW2R(inImg)
.addR2W(this->outImgs)
.build();
this->pipeline.bind(buf);
this->descriptorSets.at(fc % 2).bind(buf, this->pipeline);
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -1,148 +0,0 @@
#include "shaderchains/epsilon.hpp"
#include "utils/utils.hpp"
using namespace LSFG::Shaderchains;
Epsilon::Epsilon(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
std::array<Core::Image, 3> inImgs1,
Core::Image inImg2,
std::optional<Core::Image> optImg,
size_t genc)
: inImgs1(std::move(inImgs1)),
inImg2(std::move(inImg2)),
optImg(std::move(optImg)) {
this->shaderModules = {{
shaderpool.getShader(device, "epsilon/0.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "epsilon/1.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "epsilon/2.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "epsilon/3.spv",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 6, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
shaderpool.getPipeline(device, "epsilon/0.spv"),
shaderpool.getPipeline(device, "epsilon/1.spv"),
shaderpool.getPipeline(device, "epsilon/2.spv"),
shaderpool.getPipeline(device, "epsilon/3.spv")
}};
for (size_t i = 0; i < 3; i++)
this->descriptorSets.at(i) = Core::DescriptorSet(device, pool,
this->shaderModules.at(i));
for (size_t i = 0; i < genc; i++)
this->nDescriptorSets.emplace_back(device, pool,
this->shaderModules.at(3));
for (size_t i = 0; i < genc; i++) {
auto data = Globals::fgBuffer;
data.timestamp = static_cast<float>(i + 1) / static_cast<float>(genc + 1);
this->buffers.emplace_back(device, data, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
}
const auto extent = this->inImgs1.at(0).getExtent();
for (size_t i = 0; i < 4; i++) {
this->tempImgs1.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->tempImgs2.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
}
this->outImg = Core::Image(device,
extent,
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->descriptorSets.at(0).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
this->descriptorSets.at(1).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(2).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
for (size_t i = 0; i < genc; i++) {
this->nDescriptorSets.at(i).update(device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffers.at(i))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampEdge)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg)
.build();
}
}
void Epsilon::Dispatch(const Core::CommandBuffer& buf, uint64_t pass) {
const auto extent = this->tempImgs1.at(0).getExtent();
// first pass
const uint32_t threadsX = (extent.width + 7) >> 3;
const uint32_t threadsY = (extent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImgs1)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(0).bind(buf);
this->descriptorSets.at(0).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
this->descriptorSets.at(1).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(2).bind(buf);
this->descriptorSets.at(2).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addW2R(this->optImg)
.addW2R(this->inImg2)
.addR2W(this->outImg)
.build();
this->pipelines.at(3).bind(buf);
this->nDescriptorSets.at(pass).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -1,74 +0,0 @@
#include "shaderchains/extract.hpp"
#include "utils/utils.hpp"
using namespace LSFG::Shaderchains;
Extract::Extract(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
Core::Image inImg1,
Core::Image inImg2,
VkExtent2D outExtent,
size_t genc)
: inImg1(std::move(inImg1)),
inImg2(std::move(inImg2)) {
this->shaderModule = shaderpool.getShader(device, "extract.spv",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } });
this->pipeline = shaderpool.getPipeline(device, "extract.spv");
for (size_t i = 0; i < genc; i++)
this->nDescriptorSets.emplace_back(device, pool,
this->shaderModule);
for (size_t i = 0; i < genc; i++) {
auto data = Globals::fgBuffer;
data.timestamp = static_cast<float>(i + 1) / static_cast<float>(genc + 1);
this->buffers.emplace_back(device, data, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
}
this->whiteImg = Core::Image(device,
outExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT
| VK_IMAGE_USAGE_TRANSFER_DST_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->outImg = Core::Image(device,
outExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
for (size_t i = 0; i < genc; i++) {
this->nDescriptorSets.at(i).update(device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffers.at(i))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampEdge)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->whiteImg)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg)
.build();
}
// clear white image
Utils::clearImage(device, this->whiteImg, true);
}
void Extract::Dispatch(const Core::CommandBuffer& buf, uint64_t pass) {
auto extent = this->whiteImg.getExtent();
// first pass
const uint32_t threadsX = (extent.width + 7) >> 3;
const uint32_t threadsY = (extent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->whiteImg)
.addW2R(this->inImg1)
.addW2R(this->inImg2)
.addR2W(this->outImg)
.build();
this->pipeline.bind(buf);
this->nDescriptorSets.at(pass).bind(buf, this->pipeline);
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -1,272 +0,0 @@
#include "shaderchains/gamma.hpp"
#include "utils/utils.hpp"
using namespace LSFG::Shaderchains;
Gamma::Gamma(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
std::array<Core::Image, 4> inImgs1_0,
std::array<Core::Image, 4> inImgs1_1,
std::array<Core::Image, 4> inImgs1_2,
Core::Image inImg2,
std::optional<Core::Image> optImg1, // NOLINT
std::optional<Core::Image> optImg2,
VkExtent2D outExtent,
size_t genc)
: inImgs1_0(std::move(inImgs1_0)),
inImgs1_1(std::move(inImgs1_1)),
inImgs1_2(std::move(inImgs1_2)),
inImg2(std::move(inImg2)),
optImg2(std::move(optImg2)) {
this->shaderModules = {{
shaderpool.getShader(device, "gamma/0.spv",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 10, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 3, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "gamma/1.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "gamma/2.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "gamma/3.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "gamma/4.spv",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 6, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "gamma/5.spv",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
shaderpool.getPipeline(device, "gamma/0.spv"),
shaderpool.getPipeline(device, "gamma/1.spv"),
shaderpool.getPipeline(device, "gamma/2.spv"),
shaderpool.getPipeline(device, "gamma/3.spv"),
shaderpool.getPipeline(device, "gamma/4.spv"),
shaderpool.getPipeline(device, "gamma/5.spv")
}};
for (size_t i = 1; i < 4; i++)
this->descriptorSets.at(i - 1) = Core::DescriptorSet(device, pool,
this->shaderModules.at(i));
for (size_t i = 0; i < genc; i++)
this->n1DescriptorSets.emplace_back(device, pool,
this->shaderModules.at(4));
for (size_t i = 0; i < genc; i++)
this->n2DescriptorSets.emplace_back(device, pool,
this->shaderModules.at(5));
for (size_t i = 0; i < genc; i++) {
this->nSpecialDescriptorSets.emplace_back();
for (size_t j = 0; j < 3; j++)
this->nSpecialDescriptorSets.at(i).at(j) = Core::DescriptorSet(device, pool,
this->shaderModules.at(0));
}
for (size_t i = 0; i < genc; i++) {
auto data = Globals::fgBuffer;
data.timestamp = static_cast<float>(i + 1) / static_cast<float>(genc + 1);
data.firstIter = !optImg1.has_value();
this->buffers.emplace_back(device, data, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
}
const auto extent = this->inImgs1_0.at(0).getExtent();
this->optImg1 = optImg1.value_or(Core::Image(device, extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT
| VK_IMAGE_USAGE_TRANSFER_DST_BIT,
VK_IMAGE_ASPECT_COLOR_BIT));
for (size_t i = 0; i < 4; i++) {
this->tempImgs1.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->tempImgs2.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
}
this->whiteImg = Core::Image(device, outExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT
| VK_IMAGE_USAGE_TRANSFER_DST_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->outImg1 = Core::Image(device,
extent,
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->outImg2 = Core::Image(device,
outExtent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
for (size_t fc = 0; fc < 3; fc++) {
auto* nextImgs1 = &this->inImgs1_0;
auto* prevImgs1 = &this->inImgs1_2;
if (fc == 1) {
nextImgs1 = &this->inImgs1_1;
prevImgs1 = &this->inImgs1_0;
} else if (fc == 2) {
nextImgs1 = &this->inImgs1_2;
prevImgs1 = &this->inImgs1_1;
}
for (size_t i = 0; i < genc; i++) {
this->nSpecialDescriptorSets.at(i).at(fc).update(device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffers.at(i))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampEdge)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, *prevImgs1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, *nextImgs1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(2))
.build();
}
}
this->descriptorSets.at(0).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(2))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(1).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
this->descriptorSets.at(2).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
for (size_t i = 0; i < genc; i++) {
this->n1DescriptorSets.at(i).update(device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffers.at(i))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampEdge)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg2)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg1)
.build();
this->n2DescriptorSets.at(i).update(device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffers.at(i))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampEdge)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->whiteImg)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->outImg1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg2)
.build();
}
// clear white image and optImg1 if needed
Utils::clearImage(device, this->whiteImg, true);
if (!optImg1.has_value())
Utils::clearImage(device, this->optImg1);
}
void Gamma::Dispatch(const Core::CommandBuffer& buf, uint64_t fc, uint64_t pass) {
const auto extent = this->tempImgs1.at(0).getExtent();
// first pass
uint32_t threadsX = (extent.width + 7) >> 3;
uint32_t threadsY = (extent.height + 7) >> 3;
auto* nextImgs1 = &this->inImgs1_0;
auto* prevImgs1 = &this->inImgs1_2;
if ((fc % 3) == 1) {
nextImgs1 = &this->inImgs1_1;
prevImgs1 = &this->inImgs1_0;
} else if ((fc % 3) == 2) {
nextImgs1 = &this->inImgs1_2;
prevImgs1 = &this->inImgs1_1;
}
Utils::BarrierBuilder(buf)
.addW2R(*prevImgs1)
.addW2R(*nextImgs1)
.addW2R(this->optImg1)
.addW2R(this->optImg2)
.addR2W(this->tempImgs1.at(0))
.addR2W(this->tempImgs1.at(1))
.addR2W(this->tempImgs1.at(2))
.build();
this->pipelines.at(0).bind(buf);
this->nSpecialDescriptorSets.at(pass).at(fc % 3).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1.at(0))
.addW2R(this->tempImgs1.at(1))
.addW2R(this->tempImgs1.at(2))
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
this->descriptorSets.at(0).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(2).bind(buf);
this->descriptorSets.at(1).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(3).bind(buf);
this->descriptorSets.at(2).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
// fifth pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addW2R(this->optImg2)
.addW2R(this->inImg2)
.addR2W(this->outImg1)
.build();
this->pipelines.at(4).bind(buf);
this->n1DescriptorSets.at(pass).bind(buf, this->pipelines.at(4));
buf.dispatch(threadsX, threadsY, 1);
// sixth pass
threadsX = (extent.width + 3) >> 2;
threadsY = (extent.height + 3) >> 2;
Utils::BarrierBuilder(buf)
.addW2R(this->whiteImg)
.addW2R(this->outImg1)
.addR2W(this->outImg2)
.build();
this->pipelines.at(5).bind(buf);
this->n2DescriptorSets.at(pass).bind(buf, this->pipelines.at(5));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -1,117 +0,0 @@
#include "shaderchains/magic.hpp"
#include "utils/utils.hpp"
using namespace LSFG::Shaderchains;
Magic::Magic(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
std::array<Core::Image, 4> inImgs1_0,
std::array<Core::Image, 4> inImgs1_1,
std::array<Core::Image, 4> inImgs1_2,
Core::Image inImg2,
Core::Image inImg3,
std::optional<Core::Image> optImg,
size_t genc)
: inImgs1_0(std::move(inImgs1_0)),
inImgs1_1(std::move(inImgs1_1)),
inImgs1_2(std::move(inImgs1_2)),
inImg2(std::move(inImg2)), inImg3(std::move(inImg3)),
optImg(std::move(optImg)) {
this->shaderModule = shaderpool.getShader(device, "magic.spv",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4+4+2+1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 3+3+2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } });
this->pipeline = shaderpool.getPipeline(device, "magic.spv");
for (size_t i = 0; i < genc; i++) {
this->nDescriptorSets.emplace_back();
for (size_t j = 0; j < 3; j++)
this->nDescriptorSets.at(i).at(j) = Core::DescriptorSet(device, pool, this->shaderModule);
}
for (size_t i = 0; i < genc; i++) {
auto data = Globals::fgBuffer;
data.timestamp = static_cast<float>(i + 1) / static_cast<float>(genc + 1);
data.firstIterS = !this->optImg.has_value();
this->buffers.emplace_back(device, data, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
}
auto extent = this->inImgs1_0.at(0).getExtent();
for (size_t i = 0; i < 2; i++)
this->outImgs1.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
for (size_t i = 0; i < 3; i++)
this->outImgs2.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
for (size_t i = 0; i < 3; i++)
this->outImgs3.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
for (size_t fc = 0; fc < 3; fc++) {
auto* nextImgs1 = &this->inImgs1_0;
auto* prevImgs1 = &this->inImgs1_2;
if (fc == 1) {
nextImgs1 = &this->inImgs1_1;
prevImgs1 = &this->inImgs1_0;
} else if (fc == 2) {
nextImgs1 = &this->inImgs1_2;
prevImgs1 = &this->inImgs1_1;
}
for (size_t i = 0; i < genc; i++) {
this->nDescriptorSets.at(i).at(fc).update(device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffers.at(i))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampEdge)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, *prevImgs1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, *nextImgs1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg2)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg3)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs3)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs1)
.build();
}
}
}
void Magic::Dispatch(const Core::CommandBuffer& buf, uint64_t fc, uint64_t pass) {
auto extent = this->inImgs1_0.at(0).getExtent();
// first pass
const uint32_t threadsX = (extent.width + 7) >> 3;
const uint32_t threadsY = (extent.height + 7) >> 3;
auto* nextImgs1 = &this->inImgs1_0;
auto* prevImgs1 = &this->inImgs1_2;
if ((fc % 3) == 1) {
nextImgs1 = &this->inImgs1_1;
prevImgs1 = &this->inImgs1_0;
} else if ((fc % 3) == 2) {
nextImgs1 = &this->inImgs1_2;
prevImgs1 = &this->inImgs1_1;
}
Utils::BarrierBuilder(buf)
.addW2R(*prevImgs1)
.addW2R(*nextImgs1)
.addW2R(this->inImg2)
.addW2R(this->inImg3)
.addW2R(this->optImg)
.addR2W(this->outImgs3)
.addR2W(this->outImgs2)
.addR2W(this->outImgs1)
.build();
this->pipeline.bind(buf);
this->nDescriptorSets.at(pass).at(fc % 3).bind(buf, this->pipeline);
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -1,85 +0,0 @@
#include "shaderchains/merge.hpp"
#include "utils/utils.hpp"
using namespace LSFG::Shaderchains;
Merge::Merge(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
Core::Image inImg1,
Core::Image inImg2,
Core::Image inImg3,
Core::Image inImg4,
Core::Image inImg5,
const std::vector<int>& outFds,
size_t genc)
: inImg1(std::move(inImg1)),
inImg2(std::move(inImg2)),
inImg3(std::move(inImg3)),
inImg4(std::move(inImg4)),
inImg5(std::move(inImg5)) {
this->shaderModule = shaderpool.getShader(device, "merge.spv",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 5, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } });
this->pipeline = shaderpool.getPipeline(device, "merge.spv");
for (size_t i = 0; i < genc; i++) {
this->nDescriptorSets.emplace_back();
for (size_t j = 0; j < 2; j++)
this->nDescriptorSets.at(i).at(j) = Core::DescriptorSet(device, pool, this->shaderModule);
}
for (size_t i = 0; i < genc; i++) {
auto data = Globals::fgBuffer;
data.timestamp = static_cast<float>(i + 1) / static_cast<float>(genc + 1);
this->buffers.emplace_back(device, data, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
}
auto extent = this->inImg1.getExtent();
const VkFormat format = getenv("LSFG_HDR") == nullptr
? VK_FORMAT_R8G8B8A8_UNORM
: VK_FORMAT_R16G16B16A16_SFLOAT;
for (size_t i = 0; i < genc; i++)
this->outImgs.emplace_back(device,
extent, format,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT,
outFds.at(i));
for (size_t fc = 0; fc < 2; fc++) {
for (size_t i = 0; i < genc; i++) {
this->nDescriptorSets.at(i).at(fc).update(device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffers.at(i))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampEdge)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, (fc % 2 == 0) ? this->inImg1 : this->inImg2)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, (fc % 2 == 0) ? this->inImg2 : this->inImg1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg3)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg4)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg5)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs.at(i))
.build();
}
}
}
void Merge::Dispatch(const Core::CommandBuffer& buf, uint64_t fc, uint64_t pass) {
auto extent = this->inImg1.getExtent();
// first pass
const uint32_t threadsX = (extent.width + 15) >> 4;
const uint32_t threadsY = (extent.height + 15) >> 4;
Utils::BarrierBuilder(buf)
.addW2R(this->inImg1)
.addW2R(this->inImg2)
.addW2R(this->inImg3)
.addW2R(this->inImg4)
.addW2R(this->inImg5)
.addR2W(this->outImgs.at(pass))
.build();
this->pipeline.bind(buf);
this->nDescriptorSets.at(pass).at(fc % 2).bind(buf, this->pipeline);
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -1,148 +0,0 @@
#include "shaderchains/zeta.hpp"
#include "utils/utils.hpp"
using namespace LSFG::Shaderchains;
Zeta::Zeta(const Core::Device& device, Pool::ShaderPool& shaderpool,
const Core::DescriptorPool& pool,
std::array<Core::Image, 3> inImgs1,
Core::Image inImg2,
Core::Image inImg3,
size_t genc)
: inImgs1(std::move(inImgs1)),
inImg2(std::move(inImg2)),
inImg3(std::move(inImg3)) {
this->shaderModules = {{
shaderpool.getShader(device, "zeta/0.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "zeta/1.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "zeta/2.spv",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
shaderpool.getShader(device, "zeta/3.spv",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 6, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
shaderpool.getPipeline(device, "zeta/0.spv"),
shaderpool.getPipeline(device, "zeta/1.spv"),
shaderpool.getPipeline(device, "zeta/2.spv"),
shaderpool.getPipeline(device, "zeta/3.spv")
}};
for (size_t i = 0; i < 3; i++)
this->descriptorSets.at(i) = Core::DescriptorSet(device, pool,
this->shaderModules.at(i));
for (size_t i = 0; i < genc; i++)
this->nDescriptorSets.emplace_back(device, pool,
this->shaderModules.at(3));
for (size_t i = 0; i < genc; i++) {
auto data = Globals::fgBuffer;
data.timestamp = static_cast<float>(i + 1) / static_cast<float>(genc + 1);
this->buffers.emplace_back(device, data, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
}
const auto extent = this->inImgs1.at(0).getExtent();
for (size_t i = 0; i < 4; i++) {
this->tempImgs1.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->tempImgs2.at(i) = Core::Image(device,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
}
this->outImg = Core::Image(device,
extent,
VK_FORMAT_R16G16B16A16_SFLOAT,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT);
this->descriptorSets.at(0).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
this->descriptorSets.at(1).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(2).update(device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
for (size_t i = 0; i < genc; i++) {
this->nDescriptorSets.at(i).update(device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffers.at(i) )
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampBorder)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, Globals::samplerClampEdge)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg2)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg3)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg)
.build();
}
}
void Zeta::Dispatch(const Core::CommandBuffer& buf, uint64_t pass) {
const auto extent = this->tempImgs1.at(0).getExtent();
// first pass
const uint32_t threadsX = (extent.width + 7) >> 3;
const uint32_t threadsY = (extent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImgs1)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(0).bind(buf);
this->descriptorSets.at(0).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
this->descriptorSets.at(1).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(2).bind(buf);
this->descriptorSets.at(2).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addW2R(this->inImg2)
.addW2R(this->inImg3)
.addR2W(this->outImg)
.build();
this->pipelines.at(3).bind(buf);
this->nDescriptorSets.at(pass).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -0,0 +1,25 @@
Checks:
# enable basic checks
- "clang-analyzer-*"
# configure performance checks
- "performance-*"
- "-performance-enum-size"
# configure readability and bugprone checks
- "readability-*"
- "bugprone-*"
- "misc-*"
- "-readability-braces-around-statements"
- "-readability-function-cognitive-complexity"
- "-readability-identifier-length"
- "-readability-implicit-bool-conversion"
- "-readability-magic-numbers"
- "-readability-math-missing-parentheses"
- "-bugprone-easily-swappable-parameters"
# configure modernization
- "modernize-*"
- "-modernize-use-trailing-return-type"
# configure cppcoreguidelines
- "cppcoreguidelines-*"
- "-cppcoreguidelines-avoid-magic-numbers"
- "-cppcoreguidelines-pro-type-reinterpret-cast" # allows reinterpret_cast
- "-cppcoreguidelines-avoid-non-const-global-variables"

3
lsfg-vk-v3.1/.gitattributes vendored Normal file
View file

@ -0,0 +1,3 @@
*.cpp diff=cpp eol=lf
*.hpp diff=cpp eol=lf
*.md diff=markdown eol=lf

9
lsfg-vk-v3.1/.gitignore vendored Normal file
View file

@ -0,0 +1,9 @@
# cmake files
/build
# ide/lsp files
/.zed
/.vscode
/.clangd
/.cache
/.ccls

View file

@ -0,0 +1,57 @@
cmake_minimum_required(VERSION 3.29)
# project
project(lsfg-vk-v3.1
DESCRIPTION "Lossless Scaling Frame Generation v3.1"
LANGUAGES CXX)
file(GLOB SOURCES
"src/core/*.cpp"
"src/pool/*.cpp"
"src/shaders/*.cpp"
"src/utils/*.cpp"
"src/*.cpp"
)
add_library(lsfg-vk-v3.1 STATIC ${SOURCES})
# target
set_target_properties(lsfg-vk-v3.1 PROPERTIES
CXX_STANDARD 20
CXX_STANDARD_REQUIRED ON)
target_include_directories(lsfg-vk-v3.1
PRIVATE include
PUBLIC public)
target_link_libraries(lsfg-vk-v3.1
PRIVATE vulkan)
target_compile_options(lsfg-vk-v3.1 PRIVATE
-fPIC)
# diagnostics
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set_target_properties(lsfg-vk-v3.1 PROPERTIES
EXPORT_COMPILE_COMMANDS ON)
endif()
if(LSFGVK_EXCESS_DEBUG STREQUAL "ON")
target_compile_options(lsfg-vk-v3.1 PRIVATE
-Weverything
# disable compat c++ flags
-Wno-pre-c++20-compat-pedantic
-Wno-pre-c++17-compat
-Wno-c++98-compat-pedantic
-Wno-c++98-compat
# disable other flags
-Wno-missing-designated-field-initializers
-Wno-shadow # allow shadowing
-Wno-switch-enum # ignore missing cases
-Wno-switch-default # ignore missing default
-Wno-padded # ignore automatic padding
-Wno-exit-time-destructors # allow globals
-Wno-global-constructors # allow globals
-Wno-cast-function-type-strict # for vulkan
)
set_target_properties(lsfg-vk-v3.1 PROPERTIES
CMAKE_CXX_CLANG_TIDY clang-tidy)
endif()

21
lsfg-vk-v3.1/LICENSE.md Normal file
View file

@ -0,0 +1,21 @@
## MIT License
Copyright (c) 2025 lsfg-vk
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

14
lsfg-vk-v3.1/README.md Normal file
View file

@ -0,0 +1,14 @@
## lsfg-vk-v3.1
Version 3.1 of Lossless Scaling Frame Generation
This is a subproject of lsfg-vk and contains the external Vulkan logic for generating frames.
The project is intentionally structured as a fully external project, such that it can be integrated into other applications.
### Interface
Interfacing with lsfg-vk-v3.1 is done via `lsfg.hpp` header. The internal Vulkan instance is created using `LSFG::initialize()` and requires a specific deviceUUID, as well as parts of the lsfg-vk configuration, including a function loading SPIR-V shaders by name. Cleanup is done via `LSFG::finalize()` after which `LSFG::initialize()` may be called again. Please note that the initialization process is expensive and may take a while. It is recommended to call this function once during the applications lifetime.
Once the format and extent of the requested images is determined, `LSFG::createContext()` should be called to initialize a frame generation context. The Vulkan images are created from backing memory, which is passed through the file descriptor arguments. A context can be destroyed using `LSFG::deleteContext()`.
Presenting the context can be done via `LSFG::presentContext()`. Before calling the function a second time, make sure the outgoing semaphores have been signaled.

View file

@ -0,0 +1,75 @@
#ifndef CONTEXT_HPP
#define CONTEXT_HPP
#include "shaders/alpha.hpp"
#include "shaders/beta.hpp"
#include "shaders/delta.hpp"
#include "shaders/gamma.hpp"
#include "shaders/generate.hpp"
#include "shaders/mipmaps.hpp"
namespace LSFG {
class Context {
public:
///
/// Create a context
///
/// @param vk The Vulkan instance to use.
/// @param in0 File descriptor for the first input image.
/// @param in1 File descriptor for the second input image.
/// @param outN File descriptors for the output images.
/// @param extent The size of the images.
/// @param format The format of the images.
///
/// @throws LSFG::vulkan_error if the context fails to initialize.
///
Context(Vulkan& vk,
int in0, int in1, const std::vector<int>& outN,
VkExtent2D extent, VkFormat format);
///
/// Present on the context.
///
/// @param inSem Semaphore to wait on before starting the generation.
/// @param outSem Semaphores to signal after each generation is done.
///
/// @throws LSFG::vulkan_error if the context fails to present.
///
void present(Vulkan& vk,
int inSem, const std::vector<int>& outSem);
// Trivially copyable, moveable and destructible
Context(const Context&) = default;
Context& operator=(const Context&) = default;
Context(Context&&) = default;
Context& operator=(Context&&) = default;
~Context() = default;
private:
Core::Image inImg_0, inImg_1; // inImg_0 is next when fc % 2 == 0
uint64_t frameIdx{0};
struct RenderData {
Core::Semaphore inSemaphore; // signaled when input is ready
std::vector<Core::Semaphore> internalSemaphores; // signaled when first step is done
std::vector<Core::Semaphore> outSemaphores; // signaled when each pass is done
std::vector<Core::Fence> completionFences; // fence for completion of each pass
Core::CommandBuffer cmdBuffer1;
std::vector<Core::CommandBuffer> cmdBuffers2; // command buffers for second step
bool shouldWait{false};
};
std::array<RenderData, 8> data;
Shaders::Mipmaps mipmaps;
std::array<Shaders::Alpha, 7> alpha;
Shaders::Beta beta;
std::array<Shaders::Gamma, 7> gamma;
std::array<Shaders::Delta, 3> delta;
Shaders::Generate generate;
};
}
#endif // CONTEXT_HPP

View file

@ -21,10 +21,11 @@ namespace LSFG::Core {
/// Create the device.
///
/// @param instance Vulkan instance
/// @param deviceUUID The UUID of the Vulkan device to use.
///
/// @throws LSFG::vulkan_error if object creation fails.
///
Device(const Instance& instance);
Device(const Instance& instance, uint64_t deviceUUID);
/// Get the Vulkan handle.
[[nodiscard]] auto handle() const { return *this->device; }

View file

@ -29,8 +29,10 @@ namespace LSFG::Core {
///
/// @throws LSFG::vulkan_error if object creation fails.
///
Image(const Core::Device& device, VkExtent2D extent, VkFormat format,
VkImageUsageFlags usage, VkImageAspectFlags aspectFlags);
Image(const Core::Device& device, VkExtent2D extent,
VkFormat format = VK_FORMAT_R8G8B8A8_UNORM,
VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VkImageAspectFlags aspectFlags = VK_IMAGE_ASPECT_COLOR_BIT);
///
/// Create the image with shared backing memory.

View file

@ -23,10 +23,15 @@ namespace LSFG::Core {
///
/// @param device Vulkan device
/// @param mode Address mode for the sampler.
/// @param compare Compare operation for the sampler.
/// @param isWhite Whether the border color is white.
///
/// @throws LSFG::vulkan_error if object creation fails.
///
Sampler(const Core::Device& device, VkSamplerAddressMode mode);
Sampler(const Core::Device& device,
VkSamplerAddressMode mode,
VkCompareOp compare,
bool isWhite);
/// Get the Vulkan handle.
[[nodiscard]] auto handle() const { return *this->sampler; }

View file

@ -0,0 +1,70 @@
#ifndef RESOURCEPOOL_HPP
#define RESOURCEPOOL_HPP
#include "core/device.hpp"
#include "core/buffer.hpp"
#include "core/sampler.hpp"
#include "vulkan/vulkan_core.h"
#include <unordered_map>
namespace LSFG::Pool {
///
/// Resource pool for each Vulkan device.
///
class ResourcePool {
public:
ResourcePool() noexcept = default;
///
/// Create the resource pool.
///
/// @param isHdr HDR support stored in buffers.
/// @param flowScale Scale factor stored in buffers.
///
/// @throws std::runtime_error if the resource pool cannot be created.
///
ResourcePool(bool isHdr, float flowScale)
: isHdr(isHdr), flowScale(flowScale) {}
///
/// Retrieve a buffer with given parameters or create it.
///
/// @param timestamp Timestamp stored in buffer
/// @param firstIter First iteration stored in buffer
/// @param firstIterS First special iteration stored in buffer
/// @return Created or cached buffer
///
/// @throws LSFG::vulkan_error if the buffer cannot be created.
///
Core::Buffer getBuffer(
const Core::Device& device,
float timestamp = 0.0F, bool firstIter = false, bool firstIterS = false);
///
/// Retrieve a sampler by type or create it.
///
/// @param type Type of the sampler
/// @param compare Compare operation for the sampler
/// @param isWhite Whether the sampler is white
/// @return Created or cached sampler
///
/// @throws LSFG::vulkan_error if the sampler cannot be created.
///
Core::Sampler getSampler(
const Core::Device& device,
VkSamplerAddressMode type = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
VkCompareOp compare = VK_COMPARE_OP_NEVER,
bool isWhite = false);
private:
std::unordered_map<uint64_t, Core::Buffer> buffers;
std::unordered_map<uint64_t, Core::Sampler> samplers;
bool isHdr{};
float flowScale{};
};
}
#endif // RESOURCEPOOL_HPP

View file

@ -4,10 +4,11 @@
#include "core/device.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "pool/extract.hpp"
#include <functional>
#include <string>
#include <unordered_map>
#include <vector>
namespace LSFG::Pool {
@ -21,19 +22,19 @@ namespace LSFG::Pool {
///
/// Create the shader pool.
///
/// @param path Path to the shader dll
/// @param source Function to retrieve shader source code by name.
///
/// @throws std::runtime_error if the shader pool cannot be created.
///
ShaderPool(const std::string& path) : extractor(path) {}
ShaderPool(const std::function<std::vector<uint8_t>(const std::string&)>& source)
: source(source) {}
///
/// Retrieve a shader module by name or create it.
///
/// @param device Vulkan device
/// @param name Name of the shader module
/// @param types Descriptor types for the shader module
/// @return Shader module or empty
/// @return Shader module
///
/// @throws LSFG::vulkan_error if the shader module cannot be created.
///
@ -44,7 +45,6 @@ namespace LSFG::Pool {
///
/// Retrieve a pipeline shader module by name or create it.
///
/// @param device Vulkan device
/// @param name Name of the shader module
/// @return Pipeline shader module or empty
///
@ -53,7 +53,7 @@ namespace LSFG::Pool {
Core::Pipeline getPipeline(
const Core::Device& device, const std::string& name);
private:
Extractor extractor;
std::function<std::vector<uint8_t>(const std::string&)> source;
std::unordered_map<std::string, Core::ShaderModule> shaders;
std::unordered_map<std::string, Core::Pipeline> pipelines;
};

View file

@ -0,0 +1,61 @@
#ifndef ALPHA_HPP
#define ALPHA_HPP
#include "core/commandbuffer.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "utils/utils.hpp"
#include <array>
namespace LSFG::Shaders {
///
/// Alpha shader.
///
class Alpha {
public:
Alpha() = default;
///
/// Initialize the shaderchain.
///
/// @param inImg One mipmap level
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Alpha(Vulkan& vk, Core::Image inImg);
///
/// Dispatch the shaderchain.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount);
/// Get the output images
[[nodiscard]] const auto& getOutImages() const { return this->outImgs; }
/// Trivially copyable, moveable and destructible
Alpha(const Alpha&) noexcept = default;
Alpha& operator=(const Alpha&) noexcept = default;
Alpha(Alpha&&) noexcept = default;
Alpha& operator=(Alpha&&) noexcept = default;
~Alpha() = default;
private:
std::array<Core::ShaderModule, 4> shaderModules;
std::array<Core::Pipeline, 4> pipelines;
Core::Sampler sampler;
std::array<Core::DescriptorSet, 3> descriptorSets;
std::array<Core::DescriptorSet, 3> lastDescriptorSet;
Core::Image inImg;
std::array<Core::Image, 2> tempImgs1;
std::array<Core::Image, 2> tempImgs2;
std::array<Core::Image, 4> tempImgs3;
std::array<std::array<Core::Image, 4>, 3> outImgs;
};
}
#endif // ALPHA_HPP

View file

@ -0,0 +1,61 @@
#ifndef BETA_HPP
#define BETA_HPP
#include "core/commandbuffer.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "utils/utils.hpp"
#include <array>
namespace LSFG::Shaders {
///
/// Beta shader.
///
class Beta {
public:
Beta() = default;
///
/// Initialize the shaderchain.
///
/// @param inImgs Three sets of four RGBA images, corresponding to a frame count % 3.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Beta(Vulkan& vk, std::array<std::array<Core::Image, 4>, 3> inImgs);
///
/// Dispatch the shaderchain.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount);
/// Get the output images
[[nodiscard]] const auto& getOutImages() const { return this->outImgs; }
/// Trivially copyable, moveable and destructible
Beta(const Beta&) noexcept = default;
Beta& operator=(const Beta&) noexcept = default;
Beta(Beta&&) noexcept = default;
Beta& operator=(Beta&&) noexcept = default;
~Beta() = default;
private:
std::array<Core::ShaderModule, 5> shaderModules;
std::array<Core::Pipeline, 5> pipelines;
std::array<Core::Sampler, 2> samplers;
Core::Buffer buffer;
std::array<Core::DescriptorSet, 3> firstDescriptorSet;
std::array<Core::DescriptorSet, 4> descriptorSets;
std::array<std::array<Core::Image, 4>, 3> inImgs;
std::array<Core::Image, 2> tempImgs1;
std::array<Core::Image, 2> tempImgs2;
std::array<Core::Image, 6> outImgs;
};
}
#endif // BETA_HPP

View file

@ -0,0 +1,78 @@
#ifndef DELTA_HPP
#define DELTA_HPP
#include "core/commandbuffer.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "utils/utils.hpp"
#include <array>
#include <optional>
namespace LSFG::Shaders {
///
/// Delta shader.
///
class Delta {
public:
Delta() = default;
///
/// Initialize the shaderchain.
///
/// @param inImgs1 Three sets of four RGBA images, corresponding to a frame count % 3.
/// @param inImg2 Second Input image
/// @param optImg1 Optional image for non-first passes.
/// @param optImg2 Second optional image for non-first passes.
/// @param optImg3 Third optional image for non-first passes.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Delta(Vulkan& vk, std::array<std::array<Core::Image, 4>, 3> inImgs1,
Core::Image inImg2,
std::optional<Core::Image> optImg1,
std::optional<Core::Image> optImg2,
std::optional<Core::Image> optImg3);
///
/// Dispatch the shaderchain.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx);
/// Get the first output image
[[nodiscard]] const auto& getOutImage1() const { return this->outImg1; }
/// Get the second output image
[[nodiscard]] const auto& getOutImage2() const { return this->outImg2; }
/// Trivially copyable, moveable and destructible
Delta(const Delta&) noexcept = default;
Delta& operator=(const Delta&) noexcept = default;
Delta(Delta&&) noexcept = default;
Delta& operator=(Delta&&) noexcept = default;
~Delta() = default;
private:
std::array<Core::ShaderModule, 10> shaderModules;
std::array<Core::Pipeline, 10> pipelines;
std::array<Core::Sampler, 3> samplers;
struct DeltaPass {
Core::Buffer buffer;
std::array<Core::DescriptorSet, 3> firstDescriptorSet;
std::array<Core::DescriptorSet, 8> descriptorSets;
std::array<Core::DescriptorSet, 3> sixthDescriptorSet;
};
std::vector<DeltaPass> passes;
std::array<std::array<Core::Image, 4>, 3> inImgs1;
Core::Image inImg2;
std::optional<Core::Image> optImg1, optImg2, optImg3;
std::array<Core::Image, 4> tempImgs1;
std::array<Core::Image, 4> tempImgs2;
Core::Image outImg1, outImg2;
};
}
#endif // DELTA_HPP

View file

@ -0,0 +1,70 @@
#ifndef GAMMA_HPP
#define GAMMA_HPP
#include "core/commandbuffer.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "utils/utils.hpp"
#include <array>
#include <optional>
namespace LSFG::Shaders {
///
/// Gamma shader.
///
class Gamma {
public:
Gamma() = default;
///
/// Initialize the shaderchain.
///
/// @param inImgs1 Three sets of four RGBA images, corresponding to a frame count % 3.
/// @param inImg2 Second Input image
/// @param optImg Optional image for non-first passes.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Gamma(Vulkan& vk, std::array<std::array<Core::Image, 4>, 3> inImgs1,
Core::Image inImg2, std::optional<Core::Image> optImg);
///
/// Dispatch the shaderchain.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx);
/// Get the output image
[[nodiscard]] const auto& getOutImage() const { return this->outImg; }
/// Trivially copyable, moveable and destructible
Gamma(const Gamma&) noexcept = default;
Gamma& operator=(const Gamma&) noexcept = default;
Gamma(Gamma&&) noexcept = default;
Gamma& operator=(Gamma&&) noexcept = default;
~Gamma() = default;
private:
std::array<Core::ShaderModule, 5> shaderModules;
std::array<Core::Pipeline, 5> pipelines;
std::array<Core::Sampler, 3> samplers;
struct GammaPass {
Core::Buffer buffer;
std::array<Core::DescriptorSet, 3> firstDescriptorSet;
std::array<Core::DescriptorSet, 4> descriptorSets;
};
std::vector<GammaPass> passes;
std::array<std::array<Core::Image, 4>, 3> inImgs1;
Core::Image inImg2;
std::optional<Core::Image> optImg;
std::array<Core::Image, 4> tempImgs1;
std::array<Core::Image, 4> tempImgs2;
Core::Image outImg;
};
}
#endif // GAMMA_HPP

View file

@ -0,0 +1,67 @@
#ifndef GENERATE_HPP
#define GENERATE_HPP
#include "core/commandbuffer.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "utils/utils.hpp"
#include <array>
namespace LSFG::Shaders {
///
/// Generate shader.
///
class Generate {
public:
Generate() = default;
///
/// Initialize the shaderchain.
///
/// @param inImg1 Input image 1.
/// @param inImg2 Input image 2.
/// @param inImg3 Input image 3.
/// @param inImg4 Input image 4.
/// @param inImg5 Input image 5.
/// @param fds File descriptors for the output images.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Generate(Vulkan& vk,
Core::Image inImg1, Core::Image inImg2,
Core::Image inImg3, Core::Image inImg4, Core::Image inImg5,
const std::vector<int>& fds);
///
/// Dispatch the shaderchain.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx);
/// Trivially copyable, moveable and destructible
Generate(const Generate&) noexcept = default;
Generate& operator=(const Generate&) noexcept = default;
Generate(Generate&&) noexcept = default;
Generate& operator=(Generate&&) noexcept = default;
~Generate() = default;
private:
Core::ShaderModule shaderModule;
Core::Pipeline pipeline;
std::array<Core::Sampler, 2> samplers;
struct GeneratePass {
Core::Buffer buffer;
std::array<Core::DescriptorSet, 2> descriptorSet;
};
std::vector<GeneratePass> passes;
Core::Image inImg1, inImg2;
Core::Image inImg3, inImg4, inImg5;
std::vector<Core::Image> outImgs;
};
}
#endif // GENERATE_HPP

View file

@ -0,0 +1,60 @@
#ifndef MIPMAPS_HPP
#define MIPMAPS_HPP
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/shadermodule.hpp"
#include "utils/utils.hpp"
#include <array>
namespace LSFG::Shaders {
///
/// Mipmaps shader.
///
class Mipmaps {
public:
Mipmaps() = default;
///
/// Initialize the shaderchain.
///
/// @param inImg_0 The next frame (when fc % 2 == 0)
/// @param inImg_1 The next frame (when fc % 2 == 1)
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Mipmaps(Vulkan& vk, Core::Image inImg_0, Core::Image inImg_1);
///
/// Dispatch the shaderchain.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount);
/// Get the output images.
[[nodiscard]] const auto& getOutImages() const { return this->outImgs; }
/// Trivially copyable, moveable and destructible
Mipmaps(const Mipmaps&) noexcept = default;
Mipmaps& operator=(const Mipmaps&) noexcept = default;
Mipmaps(Mipmaps&&) noexcept = default;
Mipmaps& operator=(Mipmaps&&) noexcept = default;
~Mipmaps() = default;
private:
Core::ShaderModule shaderModule;
Core::Pipeline pipeline;
Core::Buffer buffer;
Core::Sampler sampler;
std::array<Core::DescriptorSet, 2> descriptorSets;
Core::Image inImg_0, inImg_1;
std::array<Core::Image, 7> outImgs;
};
}
#endif // MIPMAPS_HPP

View file

@ -0,0 +1,19 @@
#ifndef TRANS_HPP
#define TRANS_HPP
#include <cstdint>
#include <vector>
namespace LSFG::Utils::Trans {
///
/// Translate shader bytecode to SPIR-V.
///
/// @param bytecode The shader bytecode to translate.
/// @return A vector containing the translated SPIR-V bytecode.
///
[[nodiscard]] std::vector<uint8_t> translateShader(std::vector<uint8_t> bytecode);
}
#endif // TRANS_HPP

View file

@ -2,9 +2,11 @@
#define UTILS_HPP
#include "core/commandbuffer.hpp"
#include "core/descriptorpool.hpp"
#include "core/image.hpp"
#include "core/sampler.hpp"
#include "core/device.hpp"
#include "pool/resourcepool.hpp"
#include "pool/shaderpool.hpp"
#include <string>
#include <array>
@ -84,37 +86,19 @@ namespace LSFG::Utils {
}
namespace LSFG::Globals {
namespace LSFG {
struct Vulkan {
Core::Device device;
Core::CommandPool commandPool;
Core::DescriptorPool descriptorPool;
/// Global sampler with address mode set to clamp to border.
extern Core::Sampler samplerClampBorder;
/// Global sampler with address mode set to clamp to edge.
extern Core::Sampler samplerClampEdge;
uint64_t generationCount;
float flowScale;
bool isHdr;
/// Commonly used constant buffer structure for shaders.
struct FgBuffer {
std::array<uint32_t, 2> inputOffset;
uint32_t firstIter;
uint32_t firstIterS;
uint32_t advancedColorKind;
uint32_t hdrSupport;
float resolutionInvScale;
float timestamp;
float uiThreshold;
std::array<uint32_t, 3> pad;
Pool::ShaderPool shaders;
Pool::ResourcePool resources;
};
/// Default instance of the FgBuffer.
extern FgBuffer fgBuffer;
static_assert(sizeof(FgBuffer) == 48, "FgBuffer must be 48 bytes in size.");
/// Initialize global resources.
void initializeGlobals(const Core::Device& device);
/// Uninitialize global resources.
void uninitializeGlobals() noexcept;
}
#endif // UTILS_HPP

View file

@ -1,34 +1,45 @@
#ifndef PUBLIC_LSFG_HPP
#define PUBLIC_LSFG_HPP
#ifndef LSFG_3_1_HPP
#define LSFG_3_1_HPP
#include <stdexcept>
#include <vector>
#include <vulkan/vulkan_core.h>
#include <functional>
#include <stdexcept>
#include <vector>
namespace LSFG {
///
/// Initialize the LSFG library.
///
/// @param deviceUUID The UUID of the Vulkan device to use.
/// @param isHdr Whether the images are in HDR format.
/// @param flowScale Internal flow scale factor.
/// @param generationCount Number of frames to generate.
/// @param loader Function to load shader source code by name.
///
/// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
///
void initialize();
void initialize(uint64_t deviceUUID,
bool isHdr, float flowScale, uint64_t generationCount,
const std::function<std::vector<uint8_t>(const std::string&)>& loader
);
///
/// Create a new LSFG context on a swapchain.
///
/// @param width Width of the input images.
/// @param height Height of the input images.
/// @param in0 File descriptor for the first input image.
/// @param in1 File descriptor for the second input image.
/// @param outN File descriptor for each output image. This defines the LSFG level.
/// @param extent The size of the images
/// @param format The format of the images.
/// @return A unique identifier for the created context.
///
/// @throws LSFG::vulkan_error if the context cannot be created.
///
int32_t createContext(uint32_t width, uint32_t height, int in0, int in1,
const std::vector<int>& outN);
int32_t createContext(
int in0, int in1, const std::vector<int>& outN,
VkExtent2D extent, VkFormat format);
///
/// Present a context.
@ -79,4 +90,4 @@ namespace LSFG {
}
#endif // PUBLIC_LSFG_HPP
#endif // LSFG_3_1_HPP

View file

@ -0,0 +1,110 @@
#include "context.hpp"
#include "lsfg.hpp"
#include "utils/utils.hpp"
#include <vulkan/vulkan_core.h>
#include <vector>
#include <cstddef>
#include <algorithm>
#include <optional>
#include <cstdint>
using namespace LSFG;
Context::Context(Vulkan& vk,
int in0, int in1, const std::vector<int>& outN,
VkExtent2D extent, VkFormat format) {
// import input images
this->inImg_0 = Core::Image(vk.device, extent, format,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT, in0);
this->inImg_1 = Core::Image(vk.device, extent, format,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT, in1);
// prepare render data
for (size_t i = 0; i < 8; i++) {
auto& data = this->data.at(i);
data.internalSemaphores.resize(outN.size());
data.outSemaphores.resize(outN.size());
data.completionFences.resize(outN.size());
data.cmdBuffers2.resize(outN.size());
}
// create shader chains
this->mipmaps = Shaders::Mipmaps(vk, this->inImg_0, this->inImg_1);
for (size_t i = 0; i < 7; i++)
this->alpha.at(i) = Shaders::Alpha(vk, this->mipmaps.getOutImages().at(i));
this->beta = Shaders::Beta(vk, this->alpha.at(0).getOutImages());
for (size_t i = 0; i < 7; i++) {
this->gamma.at(i) = Shaders::Gamma(vk,
this->alpha.at(6 - i).getOutImages(),
this->beta.getOutImages().at(std::min<size_t>(6 - i, 5)),
(i == 0) ? std::nullopt : std::make_optional(this->gamma.at(i - 1).getOutImage()));
if (i < 4) continue;
this->delta.at(i - 4) = Shaders::Delta(vk,
this->alpha.at(6 - i).getOutImages(),
this->beta.getOutImages().at(6 - i),
(i == 4) ? std::nullopt : std::make_optional(this->gamma.at(i - 1).getOutImage()),
(i == 4) ? std::nullopt : std::make_optional(this->delta.at(i - 5).getOutImage1()),
(i == 4) ? std::nullopt : std::make_optional(this->delta.at(i - 5).getOutImage2()));
}
this->generate = Shaders::Generate(vk,
this->inImg_0, this->inImg_1,
this->gamma.at(6).getOutImage(),
this->delta.at(2).getOutImage1(),
this->delta.at(2).getOutImage2(),
outN);
}
void Context::present(Vulkan& vk,
int inSem, const std::vector<int>& outSem) {
auto& data = this->data.at(this->frameIdx % 8);
// 3. wait for completion of previous frame in this slot
if (data.shouldWait)
for (auto& fence : data.completionFences)
if (!fence.wait(vk.device, UINT64_MAX))
throw LSFG::vulkan_error(VK_TIMEOUT, "Fence wait timed out");
// 1. create mipmaps and process input image
data.inSemaphore = Core::Semaphore(vk.device, inSem);
for (size_t i = 0; i < outSem.size(); i++)
data.internalSemaphores.at(i) = Core::Semaphore(vk.device, outSem.at(i));
data.cmdBuffer1 = Core::CommandBuffer(vk.device, vk.commandPool);
data.cmdBuffer1.begin();
this->mipmaps.Dispatch(data.cmdBuffer1, this->frameIdx);
for (size_t i = 0; i < 7; i++)
this->alpha.at(6 - i).Dispatch(data.cmdBuffer1, this->frameIdx);
this->beta.Dispatch(data.cmdBuffer1, this->frameIdx);
data.cmdBuffer1.end();
data.cmdBuffer1.submit(vk.device.getComputeQueue(), std::nullopt,
{ data.inSemaphore }, std::nullopt,
data.internalSemaphores, std::nullopt);
// 2. generate intermediary frames
for (size_t i = 0; i < 7; i++) {
data.outSemaphores.at(i) = Core::Semaphore(vk.device, outSem.at(i));
data.completionFences.at(i) = Core::Fence(vk.device);
data.cmdBuffers2.at(i) = Core::CommandBuffer(vk.device, vk.commandPool);
data.cmdBuffers2.at(i).begin();
this->gamma.at(i).Dispatch(data.cmdBuffers2.at(i), this->frameIdx, i);
if (i >= 4)
this->delta.at(i - 4).Dispatch(data.cmdBuffers2.at(i), this->frameIdx, i);
this->generate.Dispatch(data.cmdBuffers2.at(i), this->frameIdx, i);
data.cmdBuffers2.at(i).end();
data.cmdBuffers2.at(i).submit(vk.device.getComputeQueue(), std::nullopt,
{ data.internalSemaphores.at(i) }, std::nullopt,
data.outSemaphores, std::nullopt);
}
this->frameIdx++;
}

View file

@ -12,7 +12,7 @@ const std::vector<const char*> requiredExtensions = {
"VK_EXT_robustness2",
};
Device::Device(const Instance& instance) {
Device::Device(const Instance& instance, uint64_t deviceUUID) {
// get all physical devices
uint32_t deviceCount{};
auto res = vkEnumeratePhysicalDevices(instance.handle(), &deviceCount, nullptr);
@ -24,20 +24,14 @@ Device::Device(const Instance& instance) {
if (res != VK_SUCCESS)
throw LSFG::vulkan_error(res, "Failed to get physical devices");
// get uuid env vars
const char* deviceUUIDEnv = std::getenv("LSFG_DEVICE_UUID");
if (!deviceUUIDEnv)
throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED,
"LSFG_DEVICE_UUID environment variable not set");
const uint64_t deviceUUID = std::stoull(deviceUUIDEnv);
// find first discrete GPU
// get device by uuid
std::optional<VkPhysicalDevice> physicalDevice;
for (const auto& device : devices) {
VkPhysicalDeviceProperties properties;
vkGetPhysicalDeviceProperties(device, &properties);
const uint64_t uuid = static_cast<uint64_t>(properties.vendorID) << 32 | properties.deviceID;
const uint64_t uuid =
static_cast<uint64_t>(properties.vendorID) << 32 | properties.deviceID;
if (deviceUUID == uuid) {
physicalDevice = device;
break;

View file

@ -1,9 +1,13 @@
#include "core/sampler.hpp"
#include "lsfg.hpp"
#include <vulkan/vulkan_core.h>
using namespace LSFG::Core;
Sampler::Sampler(const Core::Device& device, VkSamplerAddressMode mode) {
Sampler::Sampler(const Core::Device& device,
VkSamplerAddressMode mode,
VkCompareOp compare,
bool isWhite) {
// create sampler
const VkSamplerCreateInfo desc{
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
@ -13,8 +17,11 @@ Sampler::Sampler(const Core::Device& device, VkSamplerAddressMode mode) {
.addressModeU = mode,
.addressModeV = mode,
.addressModeW = mode,
.compareOp = mode == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE ? VK_COMPARE_OP_ALWAYS : VK_COMPARE_OP_NEVER,
.maxLod = 15.99609F
.compareOp = compare,
.maxLod = VK_LOD_CLAMP_NONE,
.borderColor =
isWhite ? VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE
: VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK
};
VkSampler samplerHandle{};
auto res = vkCreateSampler(device.handle(), &desc, nullptr, &samplerHandle);

100
lsfg-vk-v3.1/src/lsfg.cpp Normal file
View file

@ -0,0 +1,100 @@
#include "lsfg.hpp"
#include "context.hpp"
#include "core/commandpool.hpp"
#include "core/descriptorpool.hpp"
#include "core/instance.hpp"
#include "pool/shaderpool.hpp"
#include "utils/utils.hpp"
#include <vulkan/vulkan_core.h>
#include <cstdint>
#include <cstdlib>
#include <ctime>
#include <format>
#include <optional>
#include <functional>
#include <string>
#include <stdexcept>
#include <unordered_map>
#include <vector>
using namespace LSFG;
namespace {
std::optional<Core::Instance> instance;
std::optional<Vulkan> device;
std::unordered_map<int32_t, Context> contexts;
}
void LSFG::initialize(uint64_t deviceUUID,
bool isHdr, float flowScale, uint64_t generationCount,
const std::function<std::vector<uint8_t>(const std::string&)>& loader) {
if (instance.has_value() || device.has_value())
return;
instance.emplace();
device.emplace(Vulkan {
.device{*instance, deviceUUID},
.generationCount = generationCount,
.flowScale = flowScale,
.isHdr = isHdr
});
device->commandPool = Core::CommandPool(device->device);
device->descriptorPool = Core::DescriptorPool(device->device);
device->resources = Pool::ResourcePool(device->isHdr, device->flowScale);
device->shaders = Pool::ShaderPool(loader);
std::srand(static_cast<uint32_t>(std::time(nullptr)));
}
int32_t LSFG::createContext(
int in0, int in1, const std::vector<int>& outN,
VkExtent2D extent, VkFormat format) {
if (!instance.has_value() || !device.has_value())
throw vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "LSFG not initialized");
const int32_t id = std::rand();
contexts.emplace(id, Context(*device, in0, in1, outN, extent, format));
return id;
}
void LSFG::presentContext(int32_t id, int inSem, const std::vector<int>& outSem) {
if (!instance.has_value() || !device.has_value())
throw vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "LSFG not initialized");
auto it = contexts.find(id);
if (it == contexts.end())
throw vulkan_error(VK_ERROR_UNKNOWN, "Context not found");
it->second.present(*device, inSem, outSem);
}
void LSFG::deleteContext(int32_t id) {
if (!instance.has_value() || !device.has_value())
throw vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "LSFG not initialized");
auto it = contexts.find(id);
if (it == contexts.end())
throw vulkan_error(VK_ERROR_DEVICE_LOST, "No such context");
vkDeviceWaitIdle(device->device.handle());
contexts.erase(it);
}
void LSFG::finalize() {
if (!instance.has_value() || !device.has_value())
return;
vkDeviceWaitIdle(device->device.handle());
contexts.clear();
device.reset();
instance.reset();
}
vulkan_error::vulkan_error(VkResult result, const std::string& message)
: std::runtime_error(std::format("{} (error {})", message, static_cast<int32_t>(result))), result(result) {}
vulkan_error::~vulkan_error() noexcept = default;

View file

@ -0,0 +1,67 @@
#include "pool/resourcepool.hpp"
#include "core/buffer.hpp"
#include <array>
using namespace LSFG;
using namespace LSFG::Pool;
struct ConstantBuffer {
std::array<uint32_t, 2> inputOffset;
uint32_t firstIter;
uint32_t firstIterS;
uint32_t advancedColorKind;
uint32_t hdrSupport;
float resolutionInvScale;
float timestamp;
float uiThreshold;
std::array<uint32_t, 3> pad;
};
Core::Buffer ResourcePool::getBuffer(
const Core::Device& device,
float timestamp, bool firstIter, bool firstIterS) {
uint64_t hash = 0;
const union { float f; uint32_t i; } u{
.f = timestamp };
hash |= u.i; // NOLINT
hash |= static_cast<uint64_t>(firstIter) << 32;
hash |= static_cast<uint64_t>(firstIterS) << 33;
auto it = buffers.find(hash);
if (it != buffers.end())
return it->second;
// create the buffer
const ConstantBuffer data{
.inputOffset = { 0, 0 },
.advancedColorKind = this->isHdr ? 2U : 0U,
.hdrSupport = this->isHdr,
.resolutionInvScale = this->flowScale,
.timestamp = timestamp,
.uiThreshold = 0.5F,
};
Core::Buffer buffer(device, data, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
buffers[hash] = buffer;
return buffer;
}
Core::Sampler ResourcePool::getSampler(
const Core::Device& device,
VkSamplerAddressMode type,
VkCompareOp compare,
bool isWhite) {
uint64_t hash = 0;
hash |= static_cast<uint64_t>(type) << 0;
hash |= static_cast<uint64_t>(compare) << 8;
hash |= static_cast<uint64_t>(isWhite) << 16;
auto it = samplers.find(hash);
if (it != samplers.end())
return it->second;
// create the sampler
Core::Sampler sampler(device, type, compare, isWhite);
samplers[hash] = sampler;
return sampler;
}

View file

@ -0,0 +1,44 @@
#include "pool/shaderpool.hpp"
#include "utils/trans.hpp"
#include <stdexcept>
using namespace LSFG;
using namespace LSFG::Pool;
Core::ShaderModule ShaderPool::getShader(
const Core::Device& device, const std::string& name,
const std::vector<std::pair<size_t, VkDescriptorType>>& types) {
auto it = shaders.find(name);
if (it != shaders.end())
return it->second;
// grab the shader
auto bytecode = this->source(name);
if (bytecode.empty())
throw std::runtime_error("Shader code is empty: " + name);
// create the translated shader module
auto spirvBytecode = Utils::Trans::translateShader(bytecode);
if (spirvBytecode.empty())
throw std::runtime_error("Shader code translation failed: " + name);
Core::ShaderModule shader(device, spirvBytecode, types);
shaders[name] = shader;
return shader;
}
Core::Pipeline ShaderPool::getPipeline(
const Core::Device& device, const std::string& name) {
auto it = pipelines.find(name);
if (it != pipelines.end())
return it->second;
// grab the shader module
auto shader = this->getShader(device, name, {});
// create the pipeline
Core::Pipeline pipeline(device, shader);
pipelines[name] = pipeline;
return pipeline;
}

View file

@ -0,0 +1,130 @@
#include "shaders/alpha.hpp"
using namespace LSFG::Shaders;
Alpha::Alpha(Vulkan& vk, Core::Image inImg) : inImg(std::move(inImg)) {
// create resources
this->shaderModules = {{
vk.shaders.getShader(vk.device, "alpha[0]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "alpha[1]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "alpha[2]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "alpha[3]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
vk.shaders.getPipeline(vk.device, "alpha[0]"),
vk.shaders.getPipeline(vk.device, "alpha[1]"),
vk.shaders.getPipeline(vk.device, "alpha[2]"),
vk.shaders.getPipeline(vk.device, "alpha[3]")
}};
this->sampler = vk.resources.getSampler(vk.device);
for (size_t i = 0; i < 3; i++)
this->descriptorSets.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModules.at(i));
for (size_t i = 0; i < 3; i++)
this->lastDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModules.at(3));
// create internal images/outputs
const VkExtent2D extent = this->inImg.getExtent();
const VkExtent2D halfExtent = {
.width = (extent.width + 1) >> 1,
.height = (extent.height + 1) >> 1
};
for (size_t i = 0; i < 2; i++) {
this->tempImgs1.at(i) = Core::Image(vk.device, halfExtent);
this->tempImgs2.at(i) = Core::Image(vk.device, halfExtent);
}
const VkExtent2D quarterExtent = {
.width = (halfExtent.width + 1) >> 1,
.height = (halfExtent.height + 1) >> 1
};
for (size_t i = 0; i < 4; i++) {
this->tempImgs3.at(i) = Core::Image(vk.device, quarterExtent);
for (size_t j = 0; j < 3; j++)
this->outImgs.at(j).at(i) = Core::Image(vk.device, quarterExtent);
}
// hook up shaders
this->descriptorSets.at(0).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
this->descriptorSets.at(1).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(2).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs3)
.build();
for (size_t i = 0; i < 3; i++)
this->lastDescriptorSet.at(i).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs3)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs.at(i))
.build();
}
void Alpha::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount) {
// first pass
const auto halfExtent = this->tempImgs1.at(0).getExtent();
uint32_t threadsX = (halfExtent.width + 7) >> 3;
uint32_t threadsY = (halfExtent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImg)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(0).bind(buf);
this->descriptorSets.at(0).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
this->descriptorSets.at(1).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third pass
const auto quarterExtent = this->tempImgs3.at(0).getExtent();
threadsX = (quarterExtent.width + 7) >> 3;
threadsY = (quarterExtent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs3)
.build();
this->pipelines.at(2).bind(buf);
this->descriptorSets.at(2).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs3)
.addR2W(this->outImgs.at(frameCount % 3))
.build();
this->pipelines.at(3).bind(buf);
this->lastDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -0,0 +1,151 @@
#include "shaders/beta.hpp"
using namespace LSFG::Shaders;
Beta::Beta(Vulkan& vk, std::array<std::array<Core::Image, 4>, 3> inImgs)
: inImgs(std::move(inImgs)) {
// create resources
this->shaderModules = {{
vk.shaders.getShader(vk.device, "beta[0]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 12, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "beta[1]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "beta[2]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "beta[3]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "beta[4]",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 6, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
vk.shaders.getPipeline(vk.device, "beta[0]"),
vk.shaders.getPipeline(vk.device, "beta[1]"),
vk.shaders.getPipeline(vk.device, "beta[2]"),
vk.shaders.getPipeline(vk.device, "beta[3]"),
vk.shaders.getPipeline(vk.device, "beta[4]")
}};
this->samplers.at(0) = vk.resources.getSampler(vk.device);
this->samplers.at(1) = vk.resources.getSampler(vk.device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, true);
for (size_t i = 0; i < 3; i++)
this->firstDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModules.at(0));
for (size_t i = 0; i < 4; i++)
this->descriptorSets.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModules.at(i + 1));
this->buffer = vk.resources.getBuffer(vk.device, 0.5F);
// create internal images/outputs
const VkExtent2D extent = this->inImgs.at(0).at(0).getExtent();
for (size_t i = 0; i < 2; i++) {
this->tempImgs1.at(i) = Core::Image(vk.device, extent);
this->tempImgs2.at(i) = Core::Image(vk.device, extent);
}
for (size_t i = 0; i < 6; i++)
this->outImgs.at(i) = Core::Image(vk.device,
{ extent.width >> i, extent.height >> i },
VK_FORMAT_R8_UNORM);
// hook up shaders
for (size_t i = 0; i < 3; i++) {
this->firstDescriptorSet.at(i).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs.at((i + 1) % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs.at((i + 2) % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs.at(i % 3))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
}
this->descriptorSets.at(0).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(1).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
this->descriptorSets.at(2).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(3).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs)
.build();
}
void Beta::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount) {
// first pass
const auto extent = this->tempImgs1.at(0).getExtent();
uint32_t threadsX = (extent.width + 7) >> 3;
uint32_t threadsY = (extent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImgs.at(0))
.addW2R(this->inImgs.at(1))
.addW2R(this->inImgs.at(2))
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(0).bind(buf);
this->firstDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
this->descriptorSets.at(0).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(2).bind(buf);
this->descriptorSets.at(1).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(3).bind(buf);
this->descriptorSets.at(2).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
// fifth pass
threadsX = (extent.width + 31) >> 5;
threadsY = (extent.height + 31) >> 5;
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->outImgs)
.build();
this->pipelines.at(4).bind(buf);
this->descriptorSets.at(3).bind(buf, this->pipelines.at(4));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -0,0 +1,329 @@
#include "shaders/delta.hpp"
using namespace LSFG::Shaders;
Delta::Delta(Vulkan& vk, std::array<std::array<Core::Image, 4>, 3> inImgs1,
Core::Image inImg2,
std::optional<Core::Image> optImg1,
std::optional<Core::Image> optImg2,
std::optional<Core::Image> optImg3)
: inImgs1(std::move(inImgs1)), inImg2(std::move(inImg2)),
optImg1(std::move(optImg1)), optImg2(std::move(optImg2)),
optImg3(std::move(optImg3)) {
// create resources
this->shaderModules = {{
vk.shaders.getShader(vk.device, "delta[0]",
{ { 1 , VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 9, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 3, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[1]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[2]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[3]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[4]",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 6, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[5]",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 10, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[6]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[7]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[8]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[9]",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
vk.shaders.getPipeline(vk.device, "delta[0]"),
vk.shaders.getPipeline(vk.device, "delta[1]"),
vk.shaders.getPipeline(vk.device, "delta[2]"),
vk.shaders.getPipeline(vk.device, "delta[3]"),
vk.shaders.getPipeline(vk.device, "delta[4]"),
vk.shaders.getPipeline(vk.device, "delta[5]"),
vk.shaders.getPipeline(vk.device, "delta[6]"),
vk.shaders.getPipeline(vk.device, "delta[7]"),
vk.shaders.getPipeline(vk.device, "delta[8]"),
vk.shaders.getPipeline(vk.device, "delta[9]")
}};
this->samplers.at(0) = vk.resources.getSampler(vk.device);
this->samplers.at(1) = vk.resources.getSampler(vk.device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, true);
this->samplers.at(2) = vk.resources.getSampler(vk.device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS, false);
// create internal images/outputs
const VkExtent2D extent = this->inImgs1.at(0).at(0).getExtent();
for (size_t i = 0; i < 4; i++) {
this->tempImgs1.at(i) = Core::Image(vk.device, extent);
this->tempImgs2.at(i) = Core::Image(vk.device, extent);
}
this->outImg1 = Core::Image(vk.device,
{ extent.width, extent.height },
VK_FORMAT_R16G16B16A16_SFLOAT);
this->outImg2 = Core::Image(vk.device,
{ extent.width, extent.height },
VK_FORMAT_R16G16B16A16_SFLOAT);
// hook up shaders
for (size_t pass_idx = 0; pass_idx < vk.generationCount; pass_idx++) {
auto& pass = this->passes.emplace_back();
pass.buffer = vk.resources.getBuffer(vk.device,
static_cast<float>(pass_idx + 1) / static_cast<float>(vk.generationCount + 1),
false, !this->optImg1.has_value());
for (size_t i = 0; i < 3; i++) {
pass.firstDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(0));
pass.firstDescriptorSet.at(i).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at((i + 2) % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at(i % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(2))
.build();
}
pass.descriptorSets.at(0) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(1));
pass.descriptorSets.at(0).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(2))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
pass.descriptorSets.at(1) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(2));
pass.descriptorSets.at(1).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
pass.descriptorSets.at(2) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(3));
pass.descriptorSets.at(2).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
pass.descriptorSets.at(3) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(4));
pass.descriptorSets.at(3).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg1)
.build();
for (size_t i = 0; i < 3; i++) {
pass.sixthDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(5));
pass.sixthDescriptorSet.at(i).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at((i + 2) % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at(i % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2.at(1))
.build();
}
pass.descriptorSets.at(4) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(6));
pass.descriptorSets.at(4).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2.at(1))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1))
.build();
pass.descriptorSets.at(5) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(7));
pass.descriptorSets.at(5).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2.at(1))
.build();
pass.descriptorSets.at(6) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(8));
pass.descriptorSets.at(6).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2.at(1))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1))
.build();
pass.descriptorSets.at(7) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(9));
pass.descriptorSets.at(7).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg3)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg2)
.build();
}
}
void Delta::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx) {
auto& pass = this->passes.at(pass_idx);
// first shader
const auto extent = this->tempImgs1.at(0).getExtent();
const uint32_t threadsX = (extent.width + 7) >> 3;
const uint32_t threadsY = (extent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImgs1.at((frameCount + 2) % 3))
.addW2R(this->inImgs1.at(frameCount % 3))
.addW2R(this->optImg1)
.addR2W(this->tempImgs1.at(0))
.addR2W(this->tempImgs1.at(1))
.addR2W(this->tempImgs1.at(2))
.build();
this->pipelines.at(0).bind(buf);
pass.firstDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1.at(0))
.addW2R(this->tempImgs1.at(1))
.addW2R(this->tempImgs1.at(2))
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
pass.descriptorSets.at(0).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(2).bind(buf);
pass.descriptorSets.at(1).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(3).bind(buf);
pass.descriptorSets.at(2).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
// fifth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addW2R(this->optImg1)
.addW2R(this->inImg2)
.addR2W(this->outImg1)
.build();
this->pipelines.at(4).bind(buf);
pass.descriptorSets.at(3).bind(buf, this->pipelines.at(4));
buf.dispatch(threadsX, threadsY, 1);
// sixth shader
Utils::BarrierBuilder(buf)
.addW2R(this->inImgs1.at((frameCount + 2) % 3))
.addW2R(this->inImgs1.at(frameCount % 3))
.addW2R(this->optImg1)
.addW2R(this->optImg2)
.addR2W(this->tempImgs2.at(0))
.addR2W(this->tempImgs2.at(1))
.build();
this->pipelines.at(5).bind(buf);
pass.sixthDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(5));
buf.dispatch(threadsX, threadsY, 1);
// seventh shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2.at(0))
.addW2R(this->tempImgs2.at(1))
.addR2W(this->tempImgs1.at(0))
.addR2W(this->tempImgs1.at(1))
.build();
this->pipelines.at(6).bind(buf);
pass.descriptorSets.at(4).bind(buf, this->pipelines.at(6));
buf.dispatch(threadsX, threadsY, 1);
// eighth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1.at(0))
.addW2R(this->tempImgs1.at(1))
.addR2W(this->tempImgs2.at(0))
.addR2W(this->tempImgs2.at(1))
.build();
this->pipelines.at(7).bind(buf);
pass.descriptorSets.at(5).bind(buf, this->pipelines.at(7));
buf.dispatch(threadsX, threadsY, 1);
// ninth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2.at(0))
.addW2R(this->tempImgs2.at(1))
.addW2R(this->optImg3)
.addR2W(this->tempImgs1.at(0))
.addR2W(this->tempImgs1.at(1))
.build();
this->pipelines.at(8).bind(buf);
pass.descriptorSets.at(6).bind(buf, this->pipelines.at(8));
buf.dispatch(threadsX, threadsY, 1);
// tenth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1.at(0))
.addW2R(this->tempImgs1.at(1))
.addW2R(this->optImg3)
.addR2W(this->outImg2)
.build();
this->pipelines.at(9).bind(buf);
pass.descriptorSets.at(7).bind(buf, this->pipelines.at(9));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -0,0 +1,181 @@
#include "shaders/gamma.hpp"
using namespace LSFG::Shaders;
Gamma::Gamma(Vulkan& vk, std::array<std::array<Core::Image, 4>, 3> inImgs1,
Core::Image inImg2,
std::optional<Core::Image> optImg)
: inImgs1(std::move(inImgs1)), inImg2(std::move(inImg2)),
optImg(std::move(optImg)) {
// create resources
this->shaderModules = {{
vk.shaders.getShader(vk.device, "gamma[0]",
{ { 1 , VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 9, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 3, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "gamma[1]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "gamma[2]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "gamma[3]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "gamma[4]",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 6, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
vk.shaders.getPipeline(vk.device, "gamma[0]"),
vk.shaders.getPipeline(vk.device, "gamma[1]"),
vk.shaders.getPipeline(vk.device, "gamma[2]"),
vk.shaders.getPipeline(vk.device, "gamma[3]"),
vk.shaders.getPipeline(vk.device, "gamma[4]")
}};
this->samplers.at(0) = vk.resources.getSampler(vk.device);
this->samplers.at(1) = vk.resources.getSampler(vk.device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, true);
this->samplers.at(2) = vk.resources.getSampler(vk.device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS, false);
// create internal images/outputs
const VkExtent2D extent = this->inImgs1.at(0).at(0).getExtent();
for (size_t i = 0; i < 4; i++) {
this->tempImgs1.at(i) = Core::Image(vk.device, extent);
this->tempImgs2.at(i) = Core::Image(vk.device, extent);
}
this->outImg = Core::Image(vk.device,
{ extent.width, extent.height },
VK_FORMAT_R16G16B16A16_SFLOAT);
// hook up shaders
for (size_t pass_idx = 0; pass_idx < vk.generationCount; pass_idx++) {
auto& pass = this->passes.emplace_back();
pass.buffer = vk.resources.getBuffer(vk.device,
static_cast<float>(pass_idx + 1) / static_cast<float>(vk.generationCount + 1),
!this->optImg.has_value());
for (size_t i = 0; i < 3; i++) {
pass.firstDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(0));
pass.firstDescriptorSet.at(i).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at((i + 2) % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at(i % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(2))
.build();
}
pass.descriptorSets.at(0) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(1));
pass.descriptorSets.at(0).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(2))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
pass.descriptorSets.at(1) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(2));
pass.descriptorSets.at(1).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
pass.descriptorSets.at(2) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(3));
pass.descriptorSets.at(2).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
pass.descriptorSets.at(3) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(4));
pass.descriptorSets.at(3).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg)
.build();
}
}
void Gamma::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx) {
auto& pass = this->passes.at(pass_idx);
// first shader
const auto extent = this->tempImgs1.at(0).getExtent();
const uint32_t threadsX = (extent.width + 7) >> 3;
const uint32_t threadsY = (extent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImgs1.at((frameCount + 2) % 3))
.addW2R(this->inImgs1.at(frameCount % 3))
.addW2R(this->optImg)
.addR2W(this->tempImgs1.at(0))
.addR2W(this->tempImgs1.at(1))
.addR2W(this->tempImgs1.at(2))
.build();
this->pipelines.at(0).bind(buf);
pass.firstDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1.at(0))
.addW2R(this->tempImgs1.at(1))
.addW2R(this->tempImgs1.at(2))
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
pass.descriptorSets.at(0).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(2).bind(buf);
pass.descriptorSets.at(1).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(3).bind(buf);
pass.descriptorSets.at(2).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
// fifth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addW2R(this->optImg)
.addW2R(this->inImg2)
.addR2W(this->outImg)
.build();
this->pipelines.at(4).bind(buf);
pass.descriptorSets.at(3).bind(buf, this->pipelines.at(4));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -0,0 +1,73 @@
#include "shaders/generate.hpp"
using namespace LSFG::Shaders;
Generate::Generate(Vulkan& vk,
Core::Image inImg1, Core::Image inImg2,
Core::Image inImg3, Core::Image inImg4, Core::Image inImg5,
const std::vector<int>& fds)
: inImg1(std::move(inImg1)), inImg2(std::move(inImg2)),
inImg3(std::move(inImg3)), inImg4(std::move(inImg4)),
inImg5(std::move(inImg5)) {
// create resources
this->shaderModule = vk.shaders.getShader(vk.device, "generate",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 5, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } });
this->pipeline = vk.shaders.getPipeline(vk.device, "generate");
this->samplers.at(0) = vk.resources.getSampler(vk.device);
this->samplers.at(1) = vk.resources.getSampler(vk.device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS);
// create internal images/outputs
const VkExtent2D extent = this->inImg1.getExtent();
for (size_t i = 0; i < vk.generationCount; i++)
this->outImgs.emplace_back(vk.device, extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT, fds.at(i));
// hook up shaders
for (size_t i = 0; i < vk.generationCount; i++) {
auto& pass = this->passes.emplace_back();
pass.buffer = vk.resources.getBuffer(vk.device,
static_cast<float>(i + 1) / static_cast<float>(vk.generationCount + 1));
for (size_t i = 0; i < 2; i++) {
pass.descriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModule);
pass.descriptorSet.at(i).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, i == 0 ? this->inImg1 : this->inImg2)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, i == 0 ? this->inImg2 : this->inImg1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg3)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg4)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg5)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs.at(i))
.build();
}
}
}
void Generate::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx) {
auto& pass = this->passes.at(pass_idx);
// first pass
const auto extent = this->inImg1.getExtent();
const uint32_t threadsX = (extent.width + 15) >> 4;
const uint32_t threadsY = (extent.height + 15) >> 4;
Utils::BarrierBuilder(buf)
.addW2R(this->inImg1)
.addW2R(this->inImg2)
.addW2R(this->inImg3)
.addW2R(this->inImg4)
.addW2R(this->inImg5)
.addR2W(this->outImgs.at(pass_idx))
.build();
this->pipeline.bind(buf);
pass.descriptorSet.at(frameCount % 2).bind(buf, this->pipeline);
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -0,0 +1,56 @@
#include "shaders/mipmaps.hpp"
using namespace LSFG::Shaders;
Mipmaps::Mipmaps(Vulkan& vk,
Core::Image inImg_0, Core::Image inImg_1)
: inImg_0(std::move(inImg_0)), inImg_1(std::move(inImg_1)) {
// create resources
this->shaderModule = vk.shaders.getShader(vk.device, "mipmaps",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } });
this->pipeline = vk.shaders.getPipeline(vk.device, "mipmaps");
this->buffer = vk.resources.getBuffer(vk.device);
this->sampler = vk.resources.getSampler(vk.device);
for (size_t i = 0; i < 2; i++)
this->descriptorSets.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModule);
// create outputs
const VkExtent2D flowExtent{
.width = static_cast<uint32_t>(
static_cast<float>(this->inImg_0.getExtent().width) / vk.flowScale),
.height = static_cast<uint32_t>(
static_cast<float>(this->inImg_0.getExtent().height) / vk.flowScale)
};
for (size_t i = 0; i < 7; i++)
this->outImgs.at(i) = Core::Image(vk.device,
{ flowExtent.width >> i, flowExtent.height >> i },
VK_FORMAT_R8_UNORM);
// hook up shaders
for (size_t fc = 0; fc < 2; fc++)
this->descriptorSets.at(fc).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, (fc % 2 == 0) ? this->inImg_0 : this->inImg_1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs)
.build();
}
void Mipmaps::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount) {
// first pass
const auto flowExtent = this->outImgs.at(0).getExtent();
const uint32_t threadsX = (flowExtent.width + 63) >> 6;
const uint32_t threadsY = (flowExtent.height + 63) >> 6;
Utils::BarrierBuilder(buf)
.addW2R((frameCount % 2 == 0) ? this->inImg_0 : this->inImg_1)
.addR2W(this->outImgs)
.build();
this->pipeline.bind(buf);
this->descriptorSets.at(frameCount % 2).bind(buf, this->pipeline);
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -0,0 +1,28 @@
#include "utils/trans.hpp"
using namespace LSFG::Utils;
std::vector<uint8_t> Trans::translateShader(std::vector<uint8_t> bytecode) {
return bytecode; // on windows we expect the bytecode to be spir-v
// // compile the shader
// dxvk::DxbcReader reader(reinterpret_cast<const char*>(bytecode.data()), bytecode.size());
// dxvk::DxbcModule module(reader);
// const dxvk::DxbcModuleInfo info{};
// auto shader = module.compile(info, "CS");
// // extract spir-v from d3d11 shader
// auto code = shader->getRawCode();
// // patch binding offsets
// #pragma clang diagnostic push
// #pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
// for (size_t i = 0; i < shader->m_bindingOffsets.size(); i++)
// code.data()[shader->m_bindingOffsets.at(i).bindingOffset] = static_cast<uint8_t>(i); // NOLINT
// #pragma clang diagnostic pop
// std::vector<uint8_t> spirvBytecode(code.size());
// std::copy_n(reinterpret_cast<uint8_t*>(code.data()),
// code.size(), spirvBytecode.data());
// return spirvBytecode;
// #endif
}

View file

@ -1,9 +1,21 @@
#include "utils/utils.hpp"
#include "core/buffer.hpp"
#include "core/image.hpp"
#include "core/device.hpp"
#include "core/commandpool.hpp"
#include "core/fence.hpp"
#include "lsfg.hpp"
#include <vulkan/vulkan_core.h>
#include <cstdint>
#include <cerrno>
#include <cstdlib>
#include <fstream>
#include <string>
#include <ios>
#include <system_error>
#include <vector>
using namespace LSFG;
using namespace LSFG::Utils;
@ -176,32 +188,3 @@ void Utils::clearImage(const Core::Device& device, Core::Image& image, bool whit
if (!fence.wait(device))
throw LSFG::vulkan_error(VK_TIMEOUT, "Failed to wait for clearing fence.");
}
Core::Sampler Globals::samplerClampBorder;
Core::Sampler Globals::samplerClampEdge;
Globals::FgBuffer Globals::fgBuffer;
void Globals::initializeGlobals(const Core::Device& device) {
// initialize global samplers
samplerClampBorder = Core::Sampler(device, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER);
samplerClampEdge = Core::Sampler(device, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE);
// initialize global constant buffer
fgBuffer = {
.inputOffset = { 0, 29 },
.advancedColorKind = getenv("LSFG_HDR") == nullptr ? 0U : 2U,
.hdrSupport = getenv("LSFG_HDR") != nullptr,
.resolutionInvScale = 1.0F,
.timestamp = 0.5F,
.uiThreshold = 0.1F,
};
}
void Globals::uninitializeGlobals() noexcept {
// uninitialize global samplers
samplerClampBorder = Core::Sampler();
samplerClampEdge = Core::Sampler();
// uninitialize global constant buffer
fgBuffer = Globals::FgBuffer();
}