diff --git a/CMakeLists.txt b/CMakeLists.txt index cd59fc5..ef8c74a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -16,6 +16,7 @@ include(cmake/FetchPeParse.cmake) add_subdirectory(lsfg-vk-common) add_subdirectory(lsfg-vk-v3.1) +add_subdirectory(lsfg-vk-v3.1p) # main project project(lsfg-vk @@ -38,8 +39,9 @@ set_target_properties(lsfg-vk PROPERTIES CXX_STANDARD_REQUIRED ON) target_include_directories(lsfg-vk PRIVATE include) -target_link_libraries(lsfg-vk - PRIVATE lsfg-vk-common lsfg-vk-v3.1 peparse dxvk vulkan) +target_link_libraries(lsfg-vk PRIVATE + lsfg-vk-common lsfg-vk-v3.1 lsfg-vk-v3.1p + peparse dxvk vulkan) if(CMAKE_BUILD_TYPE STREQUAL "Release") set_target_properties(lsfg-vk PROPERTIES diff --git a/include/context.hpp b/include/context.hpp index 3438e48..37269b2 100644 --- a/include/context.hpp +++ b/include/context.hpp @@ -63,6 +63,7 @@ private: Mini::CommandPool cmdPool; uint64_t frameIdx{0}; + bool isPerfMode{false}; struct RenderPassInfo { Mini::CommandBuffer preCopyBuf; // copy from swapchain image to frame_0/frame_1 diff --git a/lsfg-vk-v3.1/public/lsfg.hpp b/lsfg-vk-v3.1/public/lsfg_3_1.hpp similarity index 100% rename from lsfg-vk-v3.1/public/lsfg.hpp rename to lsfg-vk-v3.1/public/lsfg_3_1.hpp diff --git a/lsfg-vk-v3.1/src/lsfg.cpp b/lsfg-vk-v3.1/src/lsfg.cpp index 7873e8a..75e0992 100644 --- a/lsfg-vk-v3.1/src/lsfg.cpp +++ b/lsfg-vk-v3.1/src/lsfg.cpp @@ -1,4 +1,4 @@ -#include "lsfg.hpp" +#include "lsfg_3_1.hpp" #include "context.hpp" #include "core/commandpool.hpp" #include "core/descriptorpool.hpp" diff --git a/lsfg-vk-v3.1p/.clang-tidy b/lsfg-vk-v3.1p/.clang-tidy new file mode 100644 index 0000000..cf513e9 --- /dev/null +++ b/lsfg-vk-v3.1p/.clang-tidy @@ -0,0 +1,26 @@ +Checks: +# enable basic checks +- "clang-analyzer-*" +# configure performance checks +- "performance-*" +- "-performance-enum-size" +# configure readability and bugprone checks +- "readability-*" +- "bugprone-*" +- "misc-*" +- "-readability-braces-around-statements" +- "-readability-function-cognitive-complexity" +- "-readability-identifier-length" +- "-readability-implicit-bool-conversion" +- "-readability-magic-numbers" +- "-readability-math-missing-parentheses" +- "-bugprone-easily-swappable-parameters" +# configure modernization +- "modernize-*" +- "-modernize-use-trailing-return-type" +# configure cppcoreguidelines +- "cppcoreguidelines-*" +- "-cppcoreguidelines-avoid-magic-numbers" +- "-cppcoreguidelines-pro-type-reinterpret-cast" # allows reinterpret_cast +- "-cppcoreguidelines-avoid-non-const-global-variables" +- "-cppcoreguidelines-pro-type-union-access" diff --git a/lsfg-vk-v3.1p/.gitattributes b/lsfg-vk-v3.1p/.gitattributes new file mode 100644 index 0000000..8d476d4 --- /dev/null +++ b/lsfg-vk-v3.1p/.gitattributes @@ -0,0 +1,3 @@ +*.cpp diff=cpp eol=lf +*.hpp diff=cpp eol=lf +*.md diff=markdown eol=lf diff --git a/lsfg-vk-v3.1p/.gitignore b/lsfg-vk-v3.1p/.gitignore new file mode 100644 index 0000000..43ab8ae --- /dev/null +++ b/lsfg-vk-v3.1p/.gitignore @@ -0,0 +1,9 @@ +# cmake files +/build + +# ide/lsp files +/.zed +/.vscode +/.clangd +/.cache +/.ccls diff --git a/lsfg-vk-v3.1p/CMakeLists.txt b/lsfg-vk-v3.1p/CMakeLists.txt new file mode 100644 index 0000000..6e08b62 --- /dev/null +++ b/lsfg-vk-v3.1p/CMakeLists.txt @@ -0,0 +1,62 @@ +cmake_minimum_required(VERSION 3.29) + +# project +project(lsfg-vk-v3.1p + DESCRIPTION "Lossless Scaling Frame Generation v3.1 (Performance Mode)" + LANGUAGES CXX) + +file(GLOB SOURCES + "src/core/*.cpp" + "src/pool/*.cpp" + "src/shaders/*.cpp" + "src/utils/*.cpp" + "src/*.cpp" +) + +add_library(lsfg-vk-v3.1p STATIC ${SOURCES}) + +# target +set_target_properties(lsfg-vk-v3.1p PROPERTIES + CXX_STANDARD 20 + CXX_STANDARD_REQUIRED ON) +target_include_directories(lsfg-vk-v3.1p + PRIVATE include + PUBLIC public) +target_link_libraries(lsfg-vk-v3.1p + PUBLIC lsfg-vk-common vulkan) +target_compile_options(lsfg-vk-v3.1p PRIVATE + -fPIC) + +if(CMAKE_BUILD_TYPE STREQUAL "Release") + set_target_properties(lsfg-vk-v3.1p PROPERTIES + INTERPROCEDURAL_OPTIMIZATION ON) +endif() + +# diagnostics +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + set_target_properties(lsfg-vk-v3.1p PROPERTIES + EXPORT_COMPILE_COMMANDS ON) +endif() + +if(LSFGVK_EXCESS_DEBUG) + target_compile_options(lsfg-vk-v3.1p PRIVATE + -Weverything + # disable compat c++ flags + -Wno-pre-c++20-compat-pedantic + -Wno-pre-c++17-compat + -Wno-c++98-compat-pedantic + -Wno-c++98-compat + # disable other flags + -Wno-missing-designated-field-initializers + -Wno-shadow # allow shadowing + -Wno-switch-enum # ignore missing cases + -Wno-switch-default # ignore missing default + -Wno-padded # ignore automatic padding + -Wno-exit-time-destructors # allow globals + -Wno-global-constructors # allow globals + -Wno-cast-function-type-strict # for vulkan + ) + + set_target_properties(lsfg-vk-v3.1p PROPERTIES + CXX_CLANG_TIDY clang-tidy) +endif() diff --git a/lsfg-vk-v3.1p/LICENSE.md b/lsfg-vk-v3.1p/LICENSE.md new file mode 100644 index 0000000..b5c8a3e --- /dev/null +++ b/lsfg-vk-v3.1p/LICENSE.md @@ -0,0 +1,21 @@ +## MIT License + +Copyright (c) 2025 lsfg-vk + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/lsfg-vk-v3.1p/README.md b/lsfg-vk-v3.1p/README.md new file mode 100644 index 0000000..f99ba40 --- /dev/null +++ b/lsfg-vk-v3.1p/README.md @@ -0,0 +1,14 @@ +## lsfg-vk-v3.1p +Version 3.1 (Performance Mode) of Lossless Scaling Frame Generation + +This is a subproject of lsfg-vk and contains the external Vulkan logic for generating frames. + +The project is intentionally structured as a fully external project, such that it can be integrated into other applications. + +### Interface + +Interfacing with lsfg-vk-v3.1p is done via `lsfg.hpp` header. The internal Vulkan instance is created using `LSFG_3_1P::initialize()` and requires a specific deviceUUID, as well as parts of the lsfg-vk configuration, including a function loading SPIR-V shaders by name. Cleanup is done via `LSFG_3_1P::finalize()` after which `LSFG_3_1P::initialize()` may be called again. Please note that the initialization process is expensive and may take a while. It is recommended to call this function once during the applications lifetime. + +Once the format and extent of the requested images is determined, `LSFG_3_1P::createContext()` should be called to initialize a frame generation context. The Vulkan images are created from backing memory, which is passed through the file descriptor arguments. A context can be destroyed using `LSFG_3_1P::deleteContext()`. + +Presenting the context can be done via `LSFG_3_1P::presentContext()`. Before calling the function a second time, make sure the outgoing semaphores have been signaled. diff --git a/lsfg-vk-v3.1p/include/context.hpp b/lsfg-vk-v3.1p/include/context.hpp new file mode 100644 index 0000000..3819f02 --- /dev/null +++ b/lsfg-vk-v3.1p/include/context.hpp @@ -0,0 +1,83 @@ +#pragma once + +#include "core/image.hpp" +#include "core/semaphore.hpp" +#include "core/fence.hpp" +#include "core/commandbuffer.hpp" +#include "shaders/alpha.hpp" +#include "shaders/beta.hpp" +#include "shaders/delta.hpp" +#include "shaders/gamma.hpp" +#include "shaders/generate.hpp" +#include "shaders/mipmaps.hpp" +#include "common/utils.hpp" + +#include + +#include +#include +#include + +namespace LSFG { + + class Context { + public: + /// + /// Create a context + /// + /// @param vk The Vulkan instance to use. + /// @param in0 File descriptor for the first input image. + /// @param in1 File descriptor for the second input image. + /// @param outN File descriptors for the output images. + /// @param extent The size of the images. + /// @param format The format of the images. + /// + /// @throws LSFG::vulkan_error if the context fails to initialize. + /// + Context(Vulkan& vk, + int in0, int in1, const std::vector& outN, + VkExtent2D extent, VkFormat format); + + /// + /// Present on the context. + /// + /// @param inSem Semaphore to wait on before starting the generation. + /// @param outSem Semaphores to signal after each generation is done. + /// + /// @throws LSFG::vulkan_error if the context fails to present. + /// + void present(Vulkan& vk, + int inSem, const std::vector& outSem); + + // Trivially copyable, moveable and destructible + Context(const Context&) = default; + Context& operator=(const Context&) = default; + Context(Context&&) = default; + Context& operator=(Context&&) = default; + ~Context() = default; + private: + Core::Image inImg_0, inImg_1; // inImg_0 is next when fc % 2 == 0 + uint64_t frameIdx{0}; + + struct RenderData { + Core::Semaphore inSemaphore; // signaled when input is ready + std::vector internalSemaphores; // signaled when first step is done + std::vector outSemaphores; // signaled when each pass is done + std::vector completionFences; // fence for completion of each pass + + Core::CommandBuffer cmdBuffer1; + std::vector cmdBuffers2; // command buffers for second step + + bool shouldWait{false}; + }; + std::array data; + + Shaders::Mipmaps mipmaps; + std::array alpha; + Shaders::Beta beta; + std::array gamma; + std::array delta; + Shaders::Generate generate; + }; + +} diff --git a/lsfg-vk-v3.1p/include/shaders/alpha.hpp b/lsfg-vk-v3.1p/include/shaders/alpha.hpp new file mode 100644 index 0000000..c837264 --- /dev/null +++ b/lsfg-vk-v3.1p/include/shaders/alpha.hpp @@ -0,0 +1,60 @@ +#pragma once + +#include "core/commandbuffer.hpp" +#include "core/descriptorset.hpp" +#include "core/image.hpp" +#include "core/pipeline.hpp" +#include "core/sampler.hpp" +#include "core/shadermodule.hpp" +#include "common/utils.hpp" + +#include +#include + +namespace LSFG::Shaders { + + /// + /// Alpha shader. + /// + class Alpha { + public: + Alpha() = default; + + /// + /// Initialize the shaderchain. + /// + /// @param inImg One mipmap level + /// + /// @throws LSFG::vulkan_error if resource creation fails. + /// + Alpha(Vulkan& vk, Core::Image inImg); + + /// + /// Dispatch the shaderchain. + /// + void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount); + + /// Get the output images + [[nodiscard]] const auto& getOutImages() const { return this->outImgs; } + + /// Trivially copyable, moveable and destructible + Alpha(const Alpha&) noexcept = default; + Alpha& operator=(const Alpha&) noexcept = default; + Alpha(Alpha&&) noexcept = default; + Alpha& operator=(Alpha&&) noexcept = default; + ~Alpha() = default; + private: + std::array shaderModules; + std::array pipelines; + Core::Sampler sampler; + std::array descriptorSets; + std::array lastDescriptorSet; + + Core::Image inImg; + std::array tempImgs1; + std::array tempImgs2; + std::array tempImgs3; + std::array, 3> outImgs; + }; + +} diff --git a/lsfg-vk-v3.1p/include/shaders/beta.hpp b/lsfg-vk-v3.1p/include/shaders/beta.hpp new file mode 100644 index 0000000..6921673 --- /dev/null +++ b/lsfg-vk-v3.1p/include/shaders/beta.hpp @@ -0,0 +1,61 @@ +#pragma once + +#include "core/buffer.hpp" +#include "core/commandbuffer.hpp" +#include "core/descriptorset.hpp" +#include "core/image.hpp" +#include "core/pipeline.hpp" +#include "core/sampler.hpp" +#include "core/shadermodule.hpp" +#include "common/utils.hpp" + +#include +#include + +namespace LSFG::Shaders { + + /// + /// Beta shader. + /// + class Beta { + public: + Beta() = default; + + /// + /// Initialize the shaderchain. + /// + /// @param inImgs Three sets of four RGBA images, corresponding to a frame count % 3. + /// + /// @throws LSFG::vulkan_error if resource creation fails. + /// + Beta(Vulkan& vk, std::array, 3> inImgs); + + /// + /// Dispatch the shaderchain. + /// + void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount); + + /// Get the output images + [[nodiscard]] const auto& getOutImages() const { return this->outImgs; } + + /// Trivially copyable, moveable and destructible + Beta(const Beta&) noexcept = default; + Beta& operator=(const Beta&) noexcept = default; + Beta(Beta&&) noexcept = default; + Beta& operator=(Beta&&) noexcept = default; + ~Beta() = default; + private: + std::array shaderModules; + std::array pipelines; + std::array samplers; + Core::Buffer buffer; + std::array firstDescriptorSet; + std::array descriptorSets; + + std::array, 3> inImgs; + std::array tempImgs1; + std::array tempImgs2; + std::array outImgs; + }; + +} diff --git a/lsfg-vk-v3.1p/include/shaders/delta.hpp b/lsfg-vk-v3.1p/include/shaders/delta.hpp new file mode 100644 index 0000000..52a64d8 --- /dev/null +++ b/lsfg-vk-v3.1p/include/shaders/delta.hpp @@ -0,0 +1,79 @@ +#pragma once + +#include "core/buffer.hpp" +#include "core/commandbuffer.hpp" +#include "core/descriptorset.hpp" +#include "core/image.hpp" +#include "core/pipeline.hpp" +#include "core/sampler.hpp" +#include "core/shadermodule.hpp" +#include "common/utils.hpp" + +#include +#include +#include +#include + +namespace LSFG::Shaders { + + /// + /// Delta shader. + /// + class Delta { + public: + Delta() = default; + + /// + /// Initialize the shaderchain. + /// + /// @param inImgs1 Three sets of four RGBA images, corresponding to a frame count % 3. + /// @param inImg2 Second Input image + /// @param optImg1 Optional image for non-first passes. + /// @param optImg2 Second optional image for non-first passes. + /// @param optImg3 Third optional image for non-first passes. + /// + /// @throws LSFG::vulkan_error if resource creation fails. + /// + Delta(Vulkan& vk, std::array, 3> inImgs1, + Core::Image inImg2, + std::optional optImg1, + std::optional optImg2, + std::optional optImg3); + + /// + /// Dispatch the shaderchain. + /// + void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx); + + /// Get the first output image + [[nodiscard]] const auto& getOutImage1() const { return this->outImg1; } + /// Get the second output image + [[nodiscard]] const auto& getOutImage2() const { return this->outImg2; } + + /// Trivially copyable, moveable and destructible + Delta(const Delta&) noexcept = default; + Delta& operator=(const Delta&) noexcept = default; + Delta(Delta&&) noexcept = default; + Delta& operator=(Delta&&) noexcept = default; + ~Delta() = default; + private: + std::array shaderModules; + std::array pipelines; + std::array samplers; + struct DeltaPass { + Core::Buffer buffer; + std::array firstDescriptorSet; + std::array descriptorSets; + std::array sixthDescriptorSet; + }; + std::vector passes; + + std::array, 3> inImgs1; + Core::Image inImg2; + std::optional optImg1, optImg2, optImg3; + std::array tempImgs1; + std::array tempImgs2; + Core::Image outImg1, outImg2; + }; + +} diff --git a/lsfg-vk-v3.1p/include/shaders/gamma.hpp b/lsfg-vk-v3.1p/include/shaders/gamma.hpp new file mode 100644 index 0000000..6298bb3 --- /dev/null +++ b/lsfg-vk-v3.1p/include/shaders/gamma.hpp @@ -0,0 +1,71 @@ +#pragma once + +#include "core/buffer.hpp" +#include "core/commandbuffer.hpp" +#include "core/descriptorset.hpp" +#include "core/image.hpp" +#include "core/pipeline.hpp" +#include "core/sampler.hpp" +#include "core/shadermodule.hpp" +#include "common/utils.hpp" + +#include +#include +#include +#include + +namespace LSFG::Shaders { + + /// + /// Gamma shader. + /// + class Gamma { + public: + Gamma() = default; + + /// + /// Initialize the shaderchain. + /// + /// @param inImgs1 Three sets of four RGBA images, corresponding to a frame count % 3. + /// @param inImg2 Second Input image + /// @param optImg Optional image for non-first passes. + /// + /// @throws LSFG::vulkan_error if resource creation fails. + /// + Gamma(Vulkan& vk, std::array, 3> inImgs1, + Core::Image inImg2, std::optional optImg); + + /// + /// Dispatch the shaderchain. + /// + void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx); + + /// Get the output image + [[nodiscard]] const auto& getOutImage() const { return this->outImg; } + + /// Trivially copyable, moveable and destructible + Gamma(const Gamma&) noexcept = default; + Gamma& operator=(const Gamma&) noexcept = default; + Gamma(Gamma&&) noexcept = default; + Gamma& operator=(Gamma&&) noexcept = default; + ~Gamma() = default; + private: + std::array shaderModules; + std::array pipelines; + std::array samplers; + struct GammaPass { + Core::Buffer buffer; + std::array firstDescriptorSet; + std::array descriptorSets; + }; + std::vector passes; + + std::array, 3> inImgs1; + Core::Image inImg2; + std::optional optImg; + std::array tempImgs1; + std::array tempImgs2; + Core::Image outImg; + }; + +} diff --git a/lsfg-vk-v3.1p/include/shaders/generate.hpp b/lsfg-vk-v3.1p/include/shaders/generate.hpp new file mode 100644 index 0000000..cf8d5f4 --- /dev/null +++ b/lsfg-vk-v3.1p/include/shaders/generate.hpp @@ -0,0 +1,70 @@ +#pragma once + +#include "core/buffer.hpp" +#include "core/commandbuffer.hpp" +#include "core/descriptorset.hpp" +#include "core/image.hpp" +#include "core/pipeline.hpp" +#include "core/sampler.hpp" +#include "core/shadermodule.hpp" +#include "common/utils.hpp" + +#include + +#include +#include +#include + +namespace LSFG::Shaders { + + /// + /// Generate shader. + /// + class Generate { + public: + Generate() = default; + + /// + /// Initialize the shaderchain. + /// + /// @param inImg1 Input image 1. + /// @param inImg2 Input image 2. + /// @param inImg3 Input image 3. + /// @param inImg4 Input image 4. + /// @param inImg5 Input image 5. + /// @param fds File descriptors for the output images. + /// + /// @throws LSFG::vulkan_error if resource creation fails. + /// + Generate(Vulkan& vk, + Core::Image inImg1, Core::Image inImg2, + Core::Image inImg3, Core::Image inImg4, Core::Image inImg5, + const std::vector& fds, VkFormat format); + + /// + /// Dispatch the shaderchain. + /// + void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx); + + /// Trivially copyable, moveable and destructible + Generate(const Generate&) noexcept = default; + Generate& operator=(const Generate&) noexcept = default; + Generate(Generate&&) noexcept = default; + Generate& operator=(Generate&&) noexcept = default; + ~Generate() = default; + private: + Core::ShaderModule shaderModule; + Core::Pipeline pipeline; + std::array samplers; + struct GeneratePass { + Core::Buffer buffer; + std::array descriptorSet; + }; + std::vector passes; + + Core::Image inImg1, inImg2; + Core::Image inImg3, inImg4, inImg5; + std::vector outImgs; + }; + +} diff --git a/lsfg-vk-v3.1p/include/shaders/mipmaps.hpp b/lsfg-vk-v3.1p/include/shaders/mipmaps.hpp new file mode 100644 index 0000000..cdffed7 --- /dev/null +++ b/lsfg-vk-v3.1p/include/shaders/mipmaps.hpp @@ -0,0 +1,59 @@ +#pragma once + +#include "core/buffer.hpp" +#include "core/commandbuffer.hpp" +#include "core/descriptorset.hpp" +#include "core/image.hpp" +#include "core/pipeline.hpp" +#include "core/sampler.hpp" +#include "core/shadermodule.hpp" +#include "common/utils.hpp" + +#include +#include + +namespace LSFG::Shaders { + + /// + /// Mipmaps shader. + /// + class Mipmaps { + public: + Mipmaps() = default; + + /// + /// Initialize the shaderchain. + /// + /// @param inImg_0 The next frame (when fc % 2 == 0) + /// @param inImg_1 The next frame (when fc % 2 == 1) + /// + /// @throws LSFG::vulkan_error if resource creation fails. + /// + Mipmaps(Vulkan& vk, Core::Image inImg_0, Core::Image inImg_1); + + /// + /// Dispatch the shaderchain. + /// + void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount); + + /// Get the output images. + [[nodiscard]] const auto& getOutImages() const { return this->outImgs; } + + /// Trivially copyable, moveable and destructible + Mipmaps(const Mipmaps&) noexcept = default; + Mipmaps& operator=(const Mipmaps&) noexcept = default; + Mipmaps(Mipmaps&&) noexcept = default; + Mipmaps& operator=(Mipmaps&&) noexcept = default; + ~Mipmaps() = default; + private: + Core::ShaderModule shaderModule; + Core::Pipeline pipeline; + Core::Buffer buffer; + Core::Sampler sampler; + std::array descriptorSets; + + Core::Image inImg_0, inImg_1; + std::array outImgs; + }; + +} diff --git a/lsfg-vk-v3.1p/public/lsfg_3_1p.hpp b/lsfg-vk-v3.1p/public/lsfg_3_1p.hpp new file mode 100644 index 0000000..27417ce --- /dev/null +++ b/lsfg-vk-v3.1p/public/lsfg_3_1p.hpp @@ -0,0 +1,66 @@ +#pragma once + +#include + +#include +#include +#include +#include + +namespace LSFG_3_1P { + + /// + /// Initialize the LSFG library. + /// + /// @param deviceUUID The UUID of the Vulkan device to use. + /// @param isHdr Whether the images are in HDR format. + /// @param flowScale Internal flow scale factor. + /// @param generationCount Number of frames to generate. + /// @param loader Function to load shader source code by name. + /// + /// @throws LSFG::vulkan_error if Vulkan objects fail to initialize. + /// + void initialize(uint64_t deviceUUID, + bool isHdr, float flowScale, uint64_t generationCount, + const std::function(const std::string&)>& loader); + + /// + /// Create a new LSFG context on a swapchain. + /// + /// @param in0 File descriptor for the first input image. + /// @param in1 File descriptor for the second input image. + /// @param outN File descriptor for each output image. This defines the LSFG level. + /// @param extent The size of the images + /// @param format The format of the images. + /// @return A unique identifier for the created context. + /// + /// @throws LSFG::vulkan_error if the context cannot be created. + /// + int32_t createContext( + int in0, int in1, const std::vector& outN, + VkExtent2D extent, VkFormat format); + + /// + /// Present a context. + /// + /// @param id Unique identifier of the context to present. + /// @param inSem Semaphore to wait on before starting the generation. + /// @param outSem Semaphores to signal once each output image is ready. + /// + /// @throws LSFG::vulkan_error if the context cannot be presented. + /// + void presentContext(int32_t id, int inSem, const std::vector& outSem); + + /// + /// Delete an LSFG context. + /// + /// @param id Unique identifier of the context to delete. + /// + void deleteContext(int32_t id); + + /// + /// Deinitialize the LSFG library. + /// + void finalize(); + +} diff --git a/lsfg-vk-v3.1p/src/context.cpp b/lsfg-vk-v3.1p/src/context.cpp new file mode 100644 index 0000000..0e45687 --- /dev/null +++ b/lsfg-vk-v3.1p/src/context.cpp @@ -0,0 +1,121 @@ +#include "context.hpp" +#include "common/utils.hpp" +#include "common/exception.hpp" + +#include + +#include +#include +#include +#include +#include + +using namespace LSFG; + +Context::Context(Vulkan& vk, + int in0, int in1, const std::vector& outN, + VkExtent2D extent, VkFormat format) { + // import input images + this->inImg_0 = Core::Image(vk.device, extent, format, + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, in0); + this->inImg_1 = Core::Image(vk.device, extent, format, + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, in1); + + // prepare render data + for (size_t i = 0; i < 8; i++) { + auto& data = this->data.at(i); + data.internalSemaphores.resize(vk.generationCount); + data.outSemaphores.resize(vk.generationCount); + data.completionFences.resize(vk.generationCount); + data.cmdBuffers2.resize(vk.generationCount); + } + + // create shader chains + this->mipmaps = Shaders::Mipmaps(vk, this->inImg_0, this->inImg_1); + for (size_t i = 0; i < 7; i++) + this->alpha.at(i) = Shaders::Alpha(vk, this->mipmaps.getOutImages().at(i)); + this->beta = Shaders::Beta(vk, this->alpha.at(0).getOutImages()); + for (size_t i = 0; i < 7; i++) { + this->gamma.at(i) = Shaders::Gamma(vk, + this->alpha.at(6 - i).getOutImages(), + this->beta.getOutImages().at(std::min(6 - i, 5)), + (i == 0) ? std::nullopt : std::make_optional(this->gamma.at(i - 1).getOutImage())); + if (i < 4) continue; + + this->delta.at(i - 4) = Shaders::Delta(vk, + this->alpha.at(6 - i).getOutImages(), + this->beta.getOutImages().at(6 - i), + (i == 4) ? std::nullopt : std::make_optional(this->gamma.at(i - 1).getOutImage()), + (i == 4) ? std::nullopt : std::make_optional(this->delta.at(i - 5).getOutImage1()), + (i == 4) ? std::nullopt : std::make_optional(this->delta.at(i - 5).getOutImage2())); + } + this->generate = Shaders::Generate(vk, + this->inImg_0, this->inImg_1, + this->gamma.at(6).getOutImage(), + this->delta.at(2).getOutImage1(), + this->delta.at(2).getOutImage2(), + outN, format); +} + +void Context::present(Vulkan& vk, + int inSem, const std::vector& outSem) { + auto& data = this->data.at(this->frameIdx % 8); + + // 3. wait for completion of previous frame in this slot + if (data.shouldWait) + for (auto& fence : data.completionFences) + if (!fence.wait(vk.device, UINT64_MAX)) + throw LSFG::vulkan_error(VK_TIMEOUT, "Fence wait timed out"); + data.shouldWait = true; + + // 1. create mipmaps and process input image + if (inSem >= 0) data.inSemaphore = Core::Semaphore(vk.device, inSem); + for (size_t i = 0; i < vk.generationCount; i++) + data.internalSemaphores.at(i) = Core::Semaphore(vk.device); + + data.cmdBuffer1 = Core::CommandBuffer(vk.device, vk.commandPool); + data.cmdBuffer1.begin(); + + this->mipmaps.Dispatch(data.cmdBuffer1, this->frameIdx); + for (size_t i = 0; i < 7; i++) + this->alpha.at(6 - i).Dispatch(data.cmdBuffer1, this->frameIdx); + this->beta.Dispatch(data.cmdBuffer1, this->frameIdx); + + data.cmdBuffer1.end(); + std::vector waits = { data.inSemaphore }; + if (inSem < 0) waits.clear(); + data.cmdBuffer1.submit(vk.device.getComputeQueue(), std::nullopt, + waits, std::nullopt, + data.internalSemaphores, std::nullopt); + + // 2. generate intermediary frames + for (size_t pass = 0; pass < vk.generationCount; pass++) { + auto& internalSemaphore = data.internalSemaphores.at(pass); + auto& outSemaphore = data.outSemaphores.at(pass); + if (inSem >= 0) outSemaphore = Core::Semaphore(vk.device, outSem.empty() ? -1 : outSem.at(pass)); + auto& completionFence = data.completionFences.at(pass); + completionFence = Core::Fence(vk.device); + + auto& buf2 = data.cmdBuffers2.at(pass); + buf2 = Core::CommandBuffer(vk.device, vk.commandPool); + buf2.begin(); + + for (size_t i = 0; i < 7; i++) { + this->gamma.at(i).Dispatch(buf2, this->frameIdx, pass); + if (i >= 4) + this->delta.at(i - 4).Dispatch(buf2, this->frameIdx, pass); + } + this->generate.Dispatch(buf2, this->frameIdx, pass); + + buf2.end(); + std::vector signals = { outSemaphore }; + if (inSem < 0) signals.clear(); + buf2.submit(vk.device.getComputeQueue(), completionFence, + { internalSemaphore }, std::nullopt, + signals, std::nullopt); + } + + this->frameIdx++; +} diff --git a/lsfg-vk-v3.1p/src/lsfg.cpp b/lsfg-vk-v3.1p/src/lsfg.cpp new file mode 100644 index 0000000..276385a --- /dev/null +++ b/lsfg-vk-v3.1p/src/lsfg.cpp @@ -0,0 +1,96 @@ +#include "lsfg_3_1p.hpp" +#include "context.hpp" +#include "core/commandpool.hpp" +#include "core/descriptorpool.hpp" +#include "core/instance.hpp" +#include "pool/shaderpool.hpp" +#include "common/exception.hpp" +#include "common/utils.hpp" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace LSFG; +using namespace LSFG_3_1P; + +namespace { + std::optional instance; + std::optional device; + std::unordered_map contexts; +} + +void LSFG_3_1P::initialize(uint64_t deviceUUID, + bool isHdr, float flowScale, uint64_t generationCount, + const std::function(const std::string&)>& loader) { + if (instance.has_value() || device.has_value()) + return; + + instance.emplace(); + device.emplace(Vulkan { + .device{*instance, deviceUUID}, + .generationCount = generationCount, + .flowScale = flowScale, + .isHdr = isHdr + }); + contexts = std::unordered_map(); + + device->commandPool = Core::CommandPool(device->device); + device->descriptorPool = Core::DescriptorPool(device->device); + + device->resources = Pool::ResourcePool(device->isHdr, device->flowScale); + device->shaders = Pool::ShaderPool(loader); + + std::srand(static_cast(std::time(nullptr))); +} + +int32_t LSFG_3_1P::createContext( + int in0, int in1, const std::vector& outN, + VkExtent2D extent, VkFormat format) { + if (!instance.has_value() || !device.has_value()) + throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "LSFG not initialized"); + + const int32_t id = std::rand(); + contexts.emplace(id, Context(*device, in0, in1, outN, extent, format)); + return id; +} + +void LSFG_3_1P::presentContext(int32_t id, int inSem, const std::vector& outSem) { + if (!instance.has_value() || !device.has_value()) + throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "LSFG not initialized"); + + auto it = contexts.find(id); + if (it == contexts.end()) + throw LSFG::vulkan_error(VK_ERROR_UNKNOWN, "Context not found"); + + it->second.present(*device, inSem, outSem); +} + +void LSFG_3_1P::deleteContext(int32_t id) { + if (!instance.has_value() || !device.has_value()) + throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "LSFG not initialized"); + + auto it = contexts.find(id); + if (it == contexts.end()) + throw LSFG::vulkan_error(VK_ERROR_DEVICE_LOST, "No such context"); + + vkDeviceWaitIdle(device->device.handle()); + contexts.erase(it); +} + +void LSFG_3_1P::finalize() { + if (!instance.has_value() || !device.has_value()) + return; + + vkDeviceWaitIdle(device->device.handle()); + contexts.clear(); + device.reset(); + instance.reset(); +} diff --git a/lsfg-vk-v3.1p/src/shaders/alpha.cpp b/lsfg-vk-v3.1p/src/shaders/alpha.cpp new file mode 100644 index 0000000..79c555c --- /dev/null +++ b/lsfg-vk-v3.1p/src/shaders/alpha.cpp @@ -0,0 +1,139 @@ +#include "shaders/alpha.hpp" +#include "common/utils.hpp" +#include "core/commandbuffer.hpp" +#include "core/image.hpp" + +#include + +#include +#include +#include + +using namespace LSFG::Shaders; + +Alpha::Alpha(Vulkan& vk, Core::Image inImg) : inImg(std::move(inImg)) { + // create resources + this->shaderModules = {{ + vk.shaders.getShader(vk.device, "alpha[0]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "alpha[1]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "alpha[2]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "alpha[3]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }) + }}; + this->pipelines = {{ + vk.shaders.getPipeline(vk.device, "alpha[0]"), + vk.shaders.getPipeline(vk.device, "alpha[1]"), + vk.shaders.getPipeline(vk.device, "alpha[2]"), + vk.shaders.getPipeline(vk.device, "alpha[3]") + }}; + this->sampler = vk.resources.getSampler(vk.device); + for (size_t i = 0; i < 3; i++) + this->descriptorSets.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModules.at(i)); + for (size_t i = 0; i < 3; i++) + this->lastDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModules.at(3)); + + // create internal images/outputs + const VkExtent2D extent = this->inImg.getExtent(); + const VkExtent2D halfExtent = { + .width = (extent.width + 1) >> 1, + .height = (extent.height + 1) >> 1 + }; + for (size_t i = 0; i < 2; i++) { + this->tempImgs1.at(i) = Core::Image(vk.device, halfExtent); + this->tempImgs2.at(i) = Core::Image(vk.device, halfExtent); + } + + const VkExtent2D quarterExtent = { + .width = (halfExtent.width + 1) >> 1, + .height = (halfExtent.height + 1) >> 1 + }; + for (size_t i = 0; i < 4; i++) { + this->tempImgs3.at(i) = Core::Image(vk.device, quarterExtent); + for (size_t j = 0; j < 3; j++) + this->outImgs.at(j).at(i) = Core::Image(vk.device, quarterExtent); + } + + // hook up shaders + this->descriptorSets.at(0).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1) + .build(); + this->descriptorSets.at(1).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2) + .build(); + this->descriptorSets.at(2).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs3) + .build(); + for (size_t i = 0; i < 3; i++) + this->lastDescriptorSet.at(i).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs3) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs.at(i)) + .build(); +} + +void Alpha::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount) { + // first pass + const auto halfExtent = this->tempImgs1.at(0).getExtent(); + uint32_t threadsX = (halfExtent.width + 7) >> 3; + uint32_t threadsY = (halfExtent.height + 7) >> 3; + + Utils::BarrierBuilder(buf) + .addW2R(this->inImg) + .addR2W(this->tempImgs1) + .build(); + + this->pipelines.at(0).bind(buf); + this->descriptorSets.at(0).bind(buf, this->pipelines.at(0)); + buf.dispatch(threadsX, threadsY, 1); + + // second pass + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs1) + .addR2W(this->tempImgs2) + .build(); + + this->pipelines.at(1).bind(buf); + this->descriptorSets.at(1).bind(buf, this->pipelines.at(1)); + buf.dispatch(threadsX, threadsY, 1); + + // third pass + const auto quarterExtent = this->tempImgs3.at(0).getExtent(); + threadsX = (quarterExtent.width + 7) >> 3; + threadsY = (quarterExtent.height + 7) >> 3; + + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs2) + .addR2W(this->tempImgs3) + .build(); + + this->pipelines.at(2).bind(buf); + this->descriptorSets.at(2).bind(buf, this->pipelines.at(2)); + buf.dispatch(threadsX, threadsY, 1); + + // fourth pass + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs3) + .addR2W(this->outImgs.at(frameCount % 3)) + .build(); + + this->pipelines.at(3).bind(buf); + this->lastDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(3)); + buf.dispatch(threadsX, threadsY, 1); +} diff --git a/lsfg-vk-v3.1p/src/shaders/beta.cpp b/lsfg-vk-v3.1p/src/shaders/beta.cpp new file mode 100644 index 0000000..64b3fd3 --- /dev/null +++ b/lsfg-vk-v3.1p/src/shaders/beta.cpp @@ -0,0 +1,161 @@ +#include "shaders/beta.hpp" +#include "common/utils.hpp" +#include "core/commandbuffer.hpp" +#include "core/image.hpp" + +#include + +#include +#include +#include +#include + +using namespace LSFG::Shaders; + +Beta::Beta(Vulkan& vk, std::array, 3> inImgs) + : inImgs(std::move(inImgs)) { + // create resources + this->shaderModules = {{ + vk.shaders.getShader(vk.device, "beta[0]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 12, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "beta[1]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "beta[2]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "beta[3]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "beta[4]", + { { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, + { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 6, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }) + }}; + this->pipelines = {{ + vk.shaders.getPipeline(vk.device, "beta[0]"), + vk.shaders.getPipeline(vk.device, "beta[1]"), + vk.shaders.getPipeline(vk.device, "beta[2]"), + vk.shaders.getPipeline(vk.device, "beta[3]"), + vk.shaders.getPipeline(vk.device, "beta[4]") + }}; + this->samplers.at(0) = vk.resources.getSampler(vk.device); + this->samplers.at(1) = vk.resources.getSampler(vk.device, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, true); + for (size_t i = 0; i < 3; i++) + this->firstDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModules.at(0)); + for (size_t i = 0; i < 4; i++) + this->descriptorSets.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModules.at(i + 1)); + this->buffer = vk.resources.getBuffer(vk.device, 0.5F); + + // create internal images/outputs + const VkExtent2D extent = this->inImgs.at(0).at(0).getExtent(); + for (size_t i = 0; i < 2; i++) { + this->tempImgs1.at(i) = Core::Image(vk.device, extent); + this->tempImgs2.at(i) = Core::Image(vk.device, extent); + } + + for (size_t i = 0; i < 6; i++) + this->outImgs.at(i) = Core::Image(vk.device, + { extent.width >> i, extent.height >> i }, + VK_FORMAT_R8_UNORM); + + // hook up shaders + for (size_t i = 0; i < 3; i++) { + this->firstDescriptorSet.at(i).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(1)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs.at((i + 1) % 3)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs.at((i + 2) % 3)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs.at(i % 3)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1) + .build(); + } + this->descriptorSets.at(0).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2) + .build(); + this->descriptorSets.at(1).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1) + .build(); + this->descriptorSets.at(2).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2) + .build(); + this->descriptorSets.at(3).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffer) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs) + .build(); +} + +void Beta::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount) { + // first pass + const auto extent = this->tempImgs1.at(0).getExtent(); + uint32_t threadsX = (extent.width + 7) >> 3; + uint32_t threadsY = (extent.height + 7) >> 3; + + Utils::BarrierBuilder(buf) + .addW2R(this->inImgs.at(0)) + .addW2R(this->inImgs.at(1)) + .addW2R(this->inImgs.at(2)) + .addR2W(this->tempImgs1) + .build(); + + this->pipelines.at(0).bind(buf); + this->firstDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(0)); + buf.dispatch(threadsX, threadsY, 1); + + // second pass + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs1) + .addR2W(this->tempImgs2) + .build(); + + this->pipelines.at(1).bind(buf); + this->descriptorSets.at(0).bind(buf, this->pipelines.at(1)); + buf.dispatch(threadsX, threadsY, 1); + + // third pass + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs2) + .addR2W(this->tempImgs1) + .build(); + + this->pipelines.at(2).bind(buf); + this->descriptorSets.at(1).bind(buf, this->pipelines.at(2)); + buf.dispatch(threadsX, threadsY, 1); + + // fourth pass + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs1) + .addR2W(this->tempImgs2) + .build(); + + this->pipelines.at(3).bind(buf); + this->descriptorSets.at(2).bind(buf, this->pipelines.at(3)); + buf.dispatch(threadsX, threadsY, 1); + + // fifth pass + threadsX = (extent.width + 31) >> 5; + threadsY = (extent.height + 31) >> 5; + + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs2) + .addR2W(this->outImgs) + .build(); + + this->pipelines.at(4).bind(buf); + this->descriptorSets.at(3).bind(buf, this->pipelines.at(4)); + buf.dispatch(threadsX, threadsY, 1); +} diff --git a/lsfg-vk-v3.1p/src/shaders/delta.cpp b/lsfg-vk-v3.1p/src/shaders/delta.cpp new file mode 100644 index 0000000..8c05f20 --- /dev/null +++ b/lsfg-vk-v3.1p/src/shaders/delta.cpp @@ -0,0 +1,340 @@ +#include "shaders/delta.hpp" +#include "common/utils.hpp" +#include "core/commandbuffer.hpp" +#include "core/image.hpp" + +#include + +#include +#include +#include +#include +#include + +using namespace LSFG::Shaders; + +Delta::Delta(Vulkan& vk, std::array, 3> inImgs1, + Core::Image inImg2, + std::optional optImg1, + std::optional optImg2, + std::optional optImg3) + : inImgs1(std::move(inImgs1)), inImg2(std::move(inImg2)), + optImg1(std::move(optImg1)), optImg2(std::move(optImg2)), + optImg3(std::move(optImg3)) { + // create resources + this->shaderModules = {{ + vk.shaders.getShader(vk.device, "delta[0]", + { { 1 , VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 9, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 3, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "delta[1]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "delta[2]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "delta[3]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "delta[4]", + { { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 6, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "delta[5]", + { { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 10, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "delta[6]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "delta[7]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "delta[8]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "delta[9]", + { { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }) + }}; + this->pipelines = {{ + vk.shaders.getPipeline(vk.device, "delta[0]"), + vk.shaders.getPipeline(vk.device, "delta[1]"), + vk.shaders.getPipeline(vk.device, "delta[2]"), + vk.shaders.getPipeline(vk.device, "delta[3]"), + vk.shaders.getPipeline(vk.device, "delta[4]"), + vk.shaders.getPipeline(vk.device, "delta[5]"), + vk.shaders.getPipeline(vk.device, "delta[6]"), + vk.shaders.getPipeline(vk.device, "delta[7]"), + vk.shaders.getPipeline(vk.device, "delta[8]"), + vk.shaders.getPipeline(vk.device, "delta[9]") + }}; + this->samplers.at(0) = vk.resources.getSampler(vk.device); + this->samplers.at(1) = vk.resources.getSampler(vk.device, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, true); + this->samplers.at(2) = vk.resources.getSampler(vk.device, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS, false); + + // create internal images/outputs + const VkExtent2D extent = this->inImgs1.at(0).at(0).getExtent(); + for (size_t i = 0; i < 4; i++) { + this->tempImgs1.at(i) = Core::Image(vk.device, extent); + this->tempImgs2.at(i) = Core::Image(vk.device, extent); + } + + this->outImg1 = Core::Image(vk.device, + { extent.width, extent.height }, + VK_FORMAT_R16G16B16A16_SFLOAT); + this->outImg2 = Core::Image(vk.device, + { extent.width, extent.height }, + VK_FORMAT_R16G16B16A16_SFLOAT); + + // hook up shaders + for (size_t pass_idx = 0; pass_idx < vk.generationCount; pass_idx++) { + auto& pass = this->passes.emplace_back(); + pass.buffer = vk.resources.getBuffer(vk.device, + static_cast(pass_idx + 1) / static_cast(vk.generationCount + 1), + false, !this->optImg1.has_value()); + for (size_t i = 0; i < 3; i++) { + pass.firstDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(0)); + pass.firstDescriptorSet.at(i).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(1)) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at((i + 2) % 3)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at(i % 3)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg1) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(2)) + .build(); + } + pass.descriptorSets.at(0) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(1)); + pass.descriptorSets.at(0).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(2)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2) + .build(); + pass.descriptorSets.at(1) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(2)); + pass.descriptorSets.at(1).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1) + .build(); + pass.descriptorSets.at(2) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(3)); + pass.descriptorSets.at(2).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2) + .build(); + pass.descriptorSets.at(3) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(4)); + pass.descriptorSets.at(3).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg1) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg2) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg1) + .build(); + for (size_t i = 0; i < 3; i++) { + pass.sixthDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(5)); + pass.sixthDescriptorSet.at(i).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(1)) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at((i + 2) % 3)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at(i % 3)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg1) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg2) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2.at(0)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2.at(1)) + .build(); + } + pass.descriptorSets.at(4) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(6)); + pass.descriptorSets.at(4).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2.at(1)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1)) + .build(); + pass.descriptorSets.at(5) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(7)); + pass.descriptorSets.at(5).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2.at(0)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2.at(1)) + .build(); + pass.descriptorSets.at(6) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(8)); + pass.descriptorSets.at(6).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2.at(1)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1)) + .build(); + pass.descriptorSets.at(7) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(9)); + pass.descriptorSets.at(7).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg3) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg2) + .build(); + } +} + +void Delta::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx) { + auto& pass = this->passes.at(pass_idx); + + // first shader + const auto extent = this->tempImgs1.at(0).getExtent(); + const uint32_t threadsX = (extent.width + 7) >> 3; + const uint32_t threadsY = (extent.height + 7) >> 3; + + Utils::BarrierBuilder(buf) + .addW2R(this->inImgs1.at((frameCount + 2) % 3)) + .addW2R(this->inImgs1.at(frameCount % 3)) + .addW2R(this->optImg1) + .addR2W(this->tempImgs1.at(0)) + .addR2W(this->tempImgs1.at(1)) + .addR2W(this->tempImgs1.at(2)) + .build(); + + this->pipelines.at(0).bind(buf); + pass.firstDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(0)); + buf.dispatch(threadsX, threadsY, 1); + + // second shader + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs1.at(0)) + .addW2R(this->tempImgs1.at(1)) + .addW2R(this->tempImgs1.at(2)) + .addR2W(this->tempImgs2) + .build(); + + this->pipelines.at(1).bind(buf); + pass.descriptorSets.at(0).bind(buf, this->pipelines.at(1)); + buf.dispatch(threadsX, threadsY, 1); + + // third shader + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs2) + .addR2W(this->tempImgs1) + .build(); + + this->pipelines.at(2).bind(buf); + pass.descriptorSets.at(1).bind(buf, this->pipelines.at(2)); + buf.dispatch(threadsX, threadsY, 1); + + // fourth shader + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs1) + .addR2W(this->tempImgs2) + .build(); + + this->pipelines.at(3).bind(buf); + pass.descriptorSets.at(2).bind(buf, this->pipelines.at(3)); + buf.dispatch(threadsX, threadsY, 1); + + // fifth shader + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs2) + .addW2R(this->optImg1) + .addW2R(this->inImg2) + .addR2W(this->outImg1) + .build(); + + this->pipelines.at(4).bind(buf); + pass.descriptorSets.at(3).bind(buf, this->pipelines.at(4)); + buf.dispatch(threadsX, threadsY, 1); + + // sixth shader + Utils::BarrierBuilder(buf) + .addW2R(this->inImgs1.at((frameCount + 2) % 3)) + .addW2R(this->inImgs1.at(frameCount % 3)) + .addW2R(this->optImg1) + .addW2R(this->optImg2) + .addR2W(this->tempImgs2.at(0)) + .addR2W(this->tempImgs2.at(1)) + .build(); + + this->pipelines.at(5).bind(buf); + pass.sixthDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(5)); + buf.dispatch(threadsX, threadsY, 1); + + // seventh shader + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs2.at(0)) + .addW2R(this->tempImgs2.at(1)) + .addR2W(this->tempImgs1.at(0)) + .addR2W(this->tempImgs1.at(1)) + .build(); + + this->pipelines.at(6).bind(buf); + pass.descriptorSets.at(4).bind(buf, this->pipelines.at(6)); + buf.dispatch(threadsX, threadsY, 1); + + // eighth shader + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs1.at(0)) + .addW2R(this->tempImgs1.at(1)) + .addR2W(this->tempImgs2.at(0)) + .addR2W(this->tempImgs2.at(1)) + .build(); + this->pipelines.at(7).bind(buf); + pass.descriptorSets.at(5).bind(buf, this->pipelines.at(7)); + buf.dispatch(threadsX, threadsY, 1); + + // ninth shader + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs2.at(0)) + .addW2R(this->tempImgs2.at(1)) + .addW2R(this->optImg3) + .addR2W(this->tempImgs1.at(0)) + .addR2W(this->tempImgs1.at(1)) + .build(); + + this->pipelines.at(8).bind(buf); + pass.descriptorSets.at(6).bind(buf, this->pipelines.at(8)); + buf.dispatch(threadsX, threadsY, 1); + + // tenth shader + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs1.at(0)) + .addW2R(this->tempImgs1.at(1)) + .addW2R(this->optImg3) + .addR2W(this->outImg2) + .build(); + + this->pipelines.at(9).bind(buf); + pass.descriptorSets.at(7).bind(buf, this->pipelines.at(9)); + buf.dispatch(threadsX, threadsY, 1); +} diff --git a/lsfg-vk-v3.1p/src/shaders/gamma.cpp b/lsfg-vk-v3.1p/src/shaders/gamma.cpp new file mode 100644 index 0000000..e703d4e --- /dev/null +++ b/lsfg-vk-v3.1p/src/shaders/gamma.cpp @@ -0,0 +1,192 @@ +#include "shaders/gamma.hpp" +#include "common/utils.hpp" +#include "core/commandbuffer.hpp" +#include "core/image.hpp" + +#include + +#include +#include +#include +#include +#include + +using namespace LSFG::Shaders; + +Gamma::Gamma(Vulkan& vk, std::array, 3> inImgs1, + Core::Image inImg2, + std::optional optImg) + : inImgs1(std::move(inImgs1)), inImg2(std::move(inImg2)), + optImg(std::move(optImg)) { + // create resources + this->shaderModules = {{ + vk.shaders.getShader(vk.device, "gamma[0]", + { { 1 , VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 9, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 3, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "gamma[1]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "gamma[2]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "gamma[3]", + { { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }), + vk.shaders.getShader(vk.device, "gamma[4]", + { { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 6, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }) + }}; + this->pipelines = {{ + vk.shaders.getPipeline(vk.device, "gamma[0]"), + vk.shaders.getPipeline(vk.device, "gamma[1]"), + vk.shaders.getPipeline(vk.device, "gamma[2]"), + vk.shaders.getPipeline(vk.device, "gamma[3]"), + vk.shaders.getPipeline(vk.device, "gamma[4]") + }}; + this->samplers.at(0) = vk.resources.getSampler(vk.device); + this->samplers.at(1) = vk.resources.getSampler(vk.device, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, true); + this->samplers.at(2) = vk.resources.getSampler(vk.device, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS, false); + + // create internal images/outputs + const VkExtent2D extent = this->inImgs1.at(0).at(0).getExtent(); + for (size_t i = 0; i < 4; i++) { + this->tempImgs1.at(i) = Core::Image(vk.device, extent); + this->tempImgs2.at(i) = Core::Image(vk.device, extent); + } + + this->outImg = Core::Image(vk.device, + { extent.width, extent.height }, + VK_FORMAT_R16G16B16A16_SFLOAT); + + // hook up shaders + for (size_t pass_idx = 0; pass_idx < vk.generationCount; pass_idx++) { + auto& pass = this->passes.emplace_back(); + pass.buffer = vk.resources.getBuffer(vk.device, + static_cast(pass_idx + 1) / static_cast(vk.generationCount + 1), + !this->optImg.has_value()); + for (size_t i = 0; i < 3; i++) { + pass.firstDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(0)); + pass.firstDescriptorSet.at(i).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(1)) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at((i + 2) % 3)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at(i % 3)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(2)) + .build(); + } + pass.descriptorSets.at(0) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(1)); + pass.descriptorSets.at(0).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(2)) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2) + .build(); + pass.descriptorSets.at(1) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(2)); + pass.descriptorSets.at(1).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1) + .build(); + pass.descriptorSets.at(2) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(3)); + pass.descriptorSets.at(2).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2) + .build(); + pass.descriptorSets.at(3) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModules.at(4)); + pass.descriptorSets.at(3).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0)) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2)) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg2) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg) + .build(); + } +} + +void Gamma::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx) { + auto& pass = this->passes.at(pass_idx); + + // first shader + const auto extent = this->tempImgs1.at(0).getExtent(); + const uint32_t threadsX = (extent.width + 7) >> 3; + const uint32_t threadsY = (extent.height + 7) >> 3; + + Utils::BarrierBuilder(buf) + .addW2R(this->inImgs1.at((frameCount + 2) % 3)) + .addW2R(this->inImgs1.at(frameCount % 3)) + .addW2R(this->optImg) + .addR2W(this->tempImgs1.at(0)) + .addR2W(this->tempImgs1.at(1)) + .addR2W(this->tempImgs1.at(2)) + .build(); + + this->pipelines.at(0).bind(buf); + pass.firstDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(0)); + buf.dispatch(threadsX, threadsY, 1); + + // second shader + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs1.at(0)) + .addW2R(this->tempImgs1.at(1)) + .addW2R(this->tempImgs1.at(2)) + .addR2W(this->tempImgs2) + .build(); + + this->pipelines.at(1).bind(buf); + pass.descriptorSets.at(0).bind(buf, this->pipelines.at(1)); + buf.dispatch(threadsX, threadsY, 1); + + // third shader + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs2) + .addR2W(this->tempImgs1) + .build(); + + this->pipelines.at(2).bind(buf); + pass.descriptorSets.at(1).bind(buf, this->pipelines.at(2)); + buf.dispatch(threadsX, threadsY, 1); + + // fourth shader + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs1) + .addR2W(this->tempImgs2) + .build(); + + this->pipelines.at(3).bind(buf); + pass.descriptorSets.at(2).bind(buf, this->pipelines.at(3)); + buf.dispatch(threadsX, threadsY, 1); + + // fifth shader + Utils::BarrierBuilder(buf) + .addW2R(this->tempImgs2) + .addW2R(this->optImg) + .addW2R(this->inImg2) + .addR2W(this->outImg) + .build(); + + this->pipelines.at(4).bind(buf); + pass.descriptorSets.at(3).bind(buf, this->pipelines.at(4)); + buf.dispatch(threadsX, threadsY, 1); +} diff --git a/lsfg-vk-v3.1p/src/shaders/generate.cpp b/lsfg-vk-v3.1p/src/shaders/generate.cpp new file mode 100644 index 0000000..dde317c --- /dev/null +++ b/lsfg-vk-v3.1p/src/shaders/generate.cpp @@ -0,0 +1,82 @@ +#include "shaders/generate.hpp" +#include "common/utils.hpp" +#include "core/commandbuffer.hpp" +#include "core/image.hpp" + +#include + +#include +#include +#include +#include + +using namespace LSFG::Shaders; + +Generate::Generate(Vulkan& vk, + Core::Image inImg1, Core::Image inImg2, + Core::Image inImg3, Core::Image inImg4, Core::Image inImg5, + const std::vector& fds, VkFormat format) + : inImg1(std::move(inImg1)), inImg2(std::move(inImg2)), + inImg3(std::move(inImg3)), inImg4(std::move(inImg4)), + inImg5(std::move(inImg5)) { + // create resources + this->shaderModule = vk.shaders.getShader(vk.device, "generate", + { { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, + { 2, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 5, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }); + this->pipeline = vk.shaders.getPipeline(vk.device, "generate"); + this->samplers.at(0) = vk.resources.getSampler(vk.device); + this->samplers.at(1) = vk.resources.getSampler(vk.device, + VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS); + + // create internal images/outputs + const VkExtent2D extent = this->inImg1.getExtent(); + for (size_t i = 0; i < vk.generationCount; i++) + this->outImgs.emplace_back(vk.device, extent, format, + VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + VK_IMAGE_ASPECT_COLOR_BIT, fds.empty() ? -1 : fds.at(i)); + + // hook up shaders + for (size_t i = 0; i < vk.generationCount; i++) { + auto& pass = this->passes.emplace_back(); + pass.buffer = vk.resources.getBuffer(vk.device, + static_cast(i + 1) / static_cast(vk.generationCount + 1)); + for (size_t j = 0; j < 2; j++) { + pass.descriptorSet.at(j) = Core::DescriptorSet(vk.device, vk.descriptorPool, + this->shaderModule); + pass.descriptorSet.at(j).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, j == 0 ? this->inImg2 : this->inImg1) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, j == 0 ? this->inImg1 : this->inImg2) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg3) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg4) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg5) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs.at(i)) + .build(); + } + } +} + +void Generate::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx) { + auto& pass = this->passes.at(pass_idx); + + // first pass + const auto extent = this->inImg1.getExtent(); + const uint32_t threadsX = (extent.width + 15) >> 4; + const uint32_t threadsY = (extent.height + 15) >> 4; + + Utils::BarrierBuilder(buf) + .addW2R(this->inImg1) + .addW2R(this->inImg2) + .addW2R(this->inImg3) + .addW2R(this->inImg4) + .addW2R(this->inImg5) + .addR2W(this->outImgs.at(pass_idx)) + .build(); + + this->pipeline.bind(buf); + pass.descriptorSet.at(frameCount % 2).bind(buf, this->pipeline); + buf.dispatch(threadsX, threadsY, 1); +} diff --git a/lsfg-vk-v3.1p/src/shaders/mipmaps.cpp b/lsfg-vk-v3.1p/src/shaders/mipmaps.cpp new file mode 100644 index 0000000..eab1e91 --- /dev/null +++ b/lsfg-vk-v3.1p/src/shaders/mipmaps.cpp @@ -0,0 +1,65 @@ +#include "shaders/mipmaps.hpp" +#include "common/utils.hpp" +#include "core/image.hpp" +#include "core/commandbuffer.hpp" + +#include + +#include +#include +#include + +using namespace LSFG::Shaders; + +Mipmaps::Mipmaps(Vulkan& vk, + Core::Image inImg_0, Core::Image inImg_1) + : inImg_0(std::move(inImg_0)), inImg_1(std::move(inImg_1)) { + // create resources + this->shaderModule = vk.shaders.getShader(vk.device, "mipmaps", + { { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, + { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, + { 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, + { 7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }); + this->pipeline = vk.shaders.getPipeline(vk.device, "mipmaps"); + this->buffer = vk.resources.getBuffer(vk.device); + this->sampler = vk.resources.getSampler(vk.device); + for (size_t i = 0; i < 2; i++) + this->descriptorSets.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModule); + + // create outputs + const VkExtent2D flowExtent{ + .width = static_cast( + static_cast(this->inImg_0.getExtent().width) / vk.flowScale), + .height = static_cast( + static_cast(this->inImg_0.getExtent().height) / vk.flowScale) + }; + for (size_t i = 0; i < 7; i++) + this->outImgs.at(i) = Core::Image(vk.device, + { flowExtent.width >> i, flowExtent.height >> i }, + VK_FORMAT_R8_UNORM); + + // hook up shaders + for (size_t fc = 0; fc < 2; fc++) + this->descriptorSets.at(fc).update(vk.device) + .add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffer) + .add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler) + .add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, (fc % 2 == 0) ? this->inImg_0 : this->inImg_1) + .add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs) + .build(); +} + +void Mipmaps::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount) { + // first pass + const auto flowExtent = this->outImgs.at(0).getExtent(); + const uint32_t threadsX = (flowExtent.width + 63) >> 6; + const uint32_t threadsY = (flowExtent.height + 63) >> 6; + + Utils::BarrierBuilder(buf) + .addW2R((frameCount % 2 == 0) ? this->inImg_0 : this->inImg_1) + .addR2W(this->outImgs) + .build(); + + this->pipeline.bind(buf); + this->descriptorSets.at(frameCount % 2).bind(buf, this->pipeline); + buf.dispatch(threadsX, threadsY, 1); +} diff --git a/src/context.cpp b/src/context.cpp index ae1bcdf..e9f5ec8 100644 --- a/src/context.cpp +++ b/src/context.cpp @@ -8,7 +8,8 @@ #include "common/exception.hpp" #include -#include +#include +#include #include #include @@ -33,6 +34,11 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain, ? *lsfgHdrStr == '1' : false; + const char* lsfgPerfModeStr = getenv("LSFG_PERF_MODE"); + const bool perfMode = lsfgPerfModeStr + ? *lsfgPerfModeStr == '1' + : false; + // we could take the format from the swapchain, // but honestly this is safer. const VkFormat format = isHdr @@ -72,11 +78,21 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain, i, out_n_fds.at(i)); } + auto* lsfgInitialize = LSFG_3_1::initialize; + auto* lsfgCreateContext = LSFG_3_1::createContext; + auto* lsfgDeleteContext = LSFG_3_1::deleteContext; + if (perfMode) { + Log::debug("context", "Using performance mode"); + this->isPerfMode = true; + lsfgInitialize = LSFG_3_1P::initialize; + lsfgCreateContext = LSFG_3_1P::createContext; + lsfgDeleteContext = LSFG_3_1P::deleteContext; + } // initialize lsfg Log::debug("context", "(entering LSFG initialization)"); setenv("DISABLE_LSFG", "1", 1); // NOLINT Extract::extractShaders(); - LSFG_3_1::initialize( + lsfgInitialize( Utils::getDeviceUUID(info.physicalDevice), isHdr, 1.0F / flowScale, info.frameGen, [](const std::string& name) { @@ -91,12 +107,12 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain, // create lsfg context Log::debug("context", "(entering LSFG context creation)"); this->lsfgCtxId = std::shared_ptr( - new int32_t(LSFG_3_1::createContext(frame_0_fd, frame_1_fd, out_n_fds, + new int32_t(lsfgCreateContext(frame_0_fd, frame_1_fd, out_n_fds, extent, format)), - [](const int32_t* id) { + [lsfgDeleteContext = lsfgDeleteContext](const int32_t* id) { Log::info("context", "(entering LSFG context deletion with id: {})", *id); - LSFG_3_1::deleteContext(*id); + lsfgDeleteContext(*id); Log::info("context", "(exiting LSFG context deletion with id: {})", *id); } @@ -157,9 +173,15 @@ VkResult LsContext::present(const Hooks::DeviceInfo& info, const void* pNext, Vk Log::debug("context2", "(entering LSFG present with id: {})", *this->lsfgCtxId); - LSFG_3_1::presentContext(*this->lsfgCtxId, - preCopySemaphoreFd, - renderSemaphoreFds); + if (this->isPerfMode) { + LSFG_3_1P::presentContext(*this->lsfgCtxId, + preCopySemaphoreFd, + renderSemaphoreFds); + } else { + LSFG_3_1::presentContext(*this->lsfgCtxId, + preCopySemaphoreFd, + renderSemaphoreFds); + } Log::debug("context2", "(exiting LSFG present with id: {})", *this->lsfgCtxId); diff --git a/src/utils/benchmark.cpp b/src/utils/benchmark.cpp index cb1afbf..47b94fc 100644 --- a/src/utils/benchmark.cpp +++ b/src/utils/benchmark.cpp @@ -3,7 +3,8 @@ #include "utils/log.hpp" #include -#include +#include +#include #include #include @@ -34,6 +35,7 @@ namespace { const char* lsfgMultiplier = std::getenv("LSFG_MULTIPLIER"); const char* lsfgExtentWidth = std::getenv("LSFG_EXTENT_WIDTH"); const char* lsfgExtentHeight = std::getenv("LSFG_EXTENT_HEIGHT"); + const char* lsfgPerfMode = std::getenv("LSFG_PERF_MODE"); const float flowScale = lsfgFlowScale ? std::stof(lsfgFlowScale) : 1.0F; @@ -45,6 +47,17 @@ namespace { ? static_cast(std::stoul(lsfgExtentWidth)) : 1920; const uint32_t height = lsfgExtentHeight ? static_cast(std::stoul(lsfgExtentHeight)) : 1080; + const bool perfMode = lsfgPerfMode + ? *lsfgPerfMode == '1' : false; + + auto* lsfgInitialize = LSFG_3_1::initialize; + auto* lsfgCreateContext = LSFG_3_1::createContext; + auto* lsfgPresentContext = LSFG_3_1::presentContext; + if (perfMode) { + lsfgInitialize = LSFG_3_1P::initialize; + lsfgCreateContext = LSFG_3_1P::createContext; + lsfgPresentContext = LSFG_3_1P::presentContext; + } Log::info("bench", "Running {}x benchmark with {}x{} extent and flow scale of {} {} HDR", multiplier, width, height, flowScale, isHdr ? "with" : "without"); @@ -55,7 +68,7 @@ namespace { ? std::stoull(std::string(lsfgDeviceUUID), nullptr, 16) : 0x1463ABAC; Extract::extractShaders(); - LSFG_3_1::initialize( + lsfgInitialize( deviceUUID, // some magic number if not given isHdr, 1.0F / flowScale, multiplier - 1, [](const std::string& name) -> std::vector { @@ -64,7 +77,7 @@ namespace { return spirv; } ); - const int32_t ctx = LSFG_3_1::createContext(-1, -1, {}, + const int32_t ctx = lsfgCreateContext(-1, -1, {}, { .width = width, .height = height }, isHdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM ); @@ -75,7 +88,7 @@ namespace { const auto now = std::chrono::high_resolution_clock::now(); const uint64_t iterations = (8 * 500) + 1; for (uint64_t count = 0; count < iterations; count++) { - LSFG_3_1::presentContext(ctx, -1, {}); + lsfgPresentContext(ctx, -1, {}); if (count % 500 == 0) Log::info("bench", "{:.2f}% done ({}/{})",