performance mode base

This commit is contained in:
PancakeTAS 2025-07-12 18:37:44 +02:00
parent 82bcf2bb72
commit d8a9899747
No known key found for this signature in database
28 changed files with 1933 additions and 15 deletions

View file

@ -16,6 +16,7 @@ include(cmake/FetchPeParse.cmake)
add_subdirectory(lsfg-vk-common)
add_subdirectory(lsfg-vk-v3.1)
add_subdirectory(lsfg-vk-v3.1p)
# main project
project(lsfg-vk
@ -38,8 +39,9 @@ set_target_properties(lsfg-vk PROPERTIES
CXX_STANDARD_REQUIRED ON)
target_include_directories(lsfg-vk
PRIVATE include)
target_link_libraries(lsfg-vk
PRIVATE lsfg-vk-common lsfg-vk-v3.1 peparse dxvk vulkan)
target_link_libraries(lsfg-vk PRIVATE
lsfg-vk-common lsfg-vk-v3.1 lsfg-vk-v3.1p
peparse dxvk vulkan)
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set_target_properties(lsfg-vk PROPERTIES

View file

@ -63,6 +63,7 @@ private:
Mini::CommandPool cmdPool;
uint64_t frameIdx{0};
bool isPerfMode{false};
struct RenderPassInfo {
Mini::CommandBuffer preCopyBuf; // copy from swapchain image to frame_0/frame_1

View file

@ -1,4 +1,4 @@
#include "lsfg.hpp"
#include "lsfg_3_1.hpp"
#include "context.hpp"
#include "core/commandpool.hpp"
#include "core/descriptorpool.hpp"

26
lsfg-vk-v3.1p/.clang-tidy Normal file
View file

@ -0,0 +1,26 @@
Checks:
# enable basic checks
- "clang-analyzer-*"
# configure performance checks
- "performance-*"
- "-performance-enum-size"
# configure readability and bugprone checks
- "readability-*"
- "bugprone-*"
- "misc-*"
- "-readability-braces-around-statements"
- "-readability-function-cognitive-complexity"
- "-readability-identifier-length"
- "-readability-implicit-bool-conversion"
- "-readability-magic-numbers"
- "-readability-math-missing-parentheses"
- "-bugprone-easily-swappable-parameters"
# configure modernization
- "modernize-*"
- "-modernize-use-trailing-return-type"
# configure cppcoreguidelines
- "cppcoreguidelines-*"
- "-cppcoreguidelines-avoid-magic-numbers"
- "-cppcoreguidelines-pro-type-reinterpret-cast" # allows reinterpret_cast
- "-cppcoreguidelines-avoid-non-const-global-variables"
- "-cppcoreguidelines-pro-type-union-access"

3
lsfg-vk-v3.1p/.gitattributes vendored Normal file
View file

@ -0,0 +1,3 @@
*.cpp diff=cpp eol=lf
*.hpp diff=cpp eol=lf
*.md diff=markdown eol=lf

9
lsfg-vk-v3.1p/.gitignore vendored Normal file
View file

@ -0,0 +1,9 @@
# cmake files
/build
# ide/lsp files
/.zed
/.vscode
/.clangd
/.cache
/.ccls

View file

@ -0,0 +1,62 @@
cmake_minimum_required(VERSION 3.29)
# project
project(lsfg-vk-v3.1p
DESCRIPTION "Lossless Scaling Frame Generation v3.1 (Performance Mode)"
LANGUAGES CXX)
file(GLOB SOURCES
"src/core/*.cpp"
"src/pool/*.cpp"
"src/shaders/*.cpp"
"src/utils/*.cpp"
"src/*.cpp"
)
add_library(lsfg-vk-v3.1p STATIC ${SOURCES})
# target
set_target_properties(lsfg-vk-v3.1p PROPERTIES
CXX_STANDARD 20
CXX_STANDARD_REQUIRED ON)
target_include_directories(lsfg-vk-v3.1p
PRIVATE include
PUBLIC public)
target_link_libraries(lsfg-vk-v3.1p
PUBLIC lsfg-vk-common vulkan)
target_compile_options(lsfg-vk-v3.1p PRIVATE
-fPIC)
if(CMAKE_BUILD_TYPE STREQUAL "Release")
set_target_properties(lsfg-vk-v3.1p PROPERTIES
INTERPROCEDURAL_OPTIMIZATION ON)
endif()
# diagnostics
if(CMAKE_BUILD_TYPE STREQUAL "Debug")
set_target_properties(lsfg-vk-v3.1p PROPERTIES
EXPORT_COMPILE_COMMANDS ON)
endif()
if(LSFGVK_EXCESS_DEBUG)
target_compile_options(lsfg-vk-v3.1p PRIVATE
-Weverything
# disable compat c++ flags
-Wno-pre-c++20-compat-pedantic
-Wno-pre-c++17-compat
-Wno-c++98-compat-pedantic
-Wno-c++98-compat
# disable other flags
-Wno-missing-designated-field-initializers
-Wno-shadow # allow shadowing
-Wno-switch-enum # ignore missing cases
-Wno-switch-default # ignore missing default
-Wno-padded # ignore automatic padding
-Wno-exit-time-destructors # allow globals
-Wno-global-constructors # allow globals
-Wno-cast-function-type-strict # for vulkan
)
set_target_properties(lsfg-vk-v3.1p PROPERTIES
CXX_CLANG_TIDY clang-tidy)
endif()

21
lsfg-vk-v3.1p/LICENSE.md Normal file
View file

@ -0,0 +1,21 @@
## MIT License
Copyright (c) 2025 lsfg-vk
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

14
lsfg-vk-v3.1p/README.md Normal file
View file

@ -0,0 +1,14 @@
## lsfg-vk-v3.1p
Version 3.1 (Performance Mode) of Lossless Scaling Frame Generation
This is a subproject of lsfg-vk and contains the external Vulkan logic for generating frames.
The project is intentionally structured as a fully external project, such that it can be integrated into other applications.
### Interface
Interfacing with lsfg-vk-v3.1p is done via `lsfg.hpp` header. The internal Vulkan instance is created using `LSFG_3_1P::initialize()` and requires a specific deviceUUID, as well as parts of the lsfg-vk configuration, including a function loading SPIR-V shaders by name. Cleanup is done via `LSFG_3_1P::finalize()` after which `LSFG_3_1P::initialize()` may be called again. Please note that the initialization process is expensive and may take a while. It is recommended to call this function once during the applications lifetime.
Once the format and extent of the requested images is determined, `LSFG_3_1P::createContext()` should be called to initialize a frame generation context. The Vulkan images are created from backing memory, which is passed through the file descriptor arguments. A context can be destroyed using `LSFG_3_1P::deleteContext()`.
Presenting the context can be done via `LSFG_3_1P::presentContext()`. Before calling the function a second time, make sure the outgoing semaphores have been signaled.

View file

@ -0,0 +1,83 @@
#pragma once
#include "core/image.hpp"
#include "core/semaphore.hpp"
#include "core/fence.hpp"
#include "core/commandbuffer.hpp"
#include "shaders/alpha.hpp"
#include "shaders/beta.hpp"
#include "shaders/delta.hpp"
#include "shaders/gamma.hpp"
#include "shaders/generate.hpp"
#include "shaders/mipmaps.hpp"
#include "common/utils.hpp"
#include <vulkan/vulkan_core.h>
#include <vector>
#include <cstdint>
#include <array>
namespace LSFG {
class Context {
public:
///
/// Create a context
///
/// @param vk The Vulkan instance to use.
/// @param in0 File descriptor for the first input image.
/// @param in1 File descriptor for the second input image.
/// @param outN File descriptors for the output images.
/// @param extent The size of the images.
/// @param format The format of the images.
///
/// @throws LSFG::vulkan_error if the context fails to initialize.
///
Context(Vulkan& vk,
int in0, int in1, const std::vector<int>& outN,
VkExtent2D extent, VkFormat format);
///
/// Present on the context.
///
/// @param inSem Semaphore to wait on before starting the generation.
/// @param outSem Semaphores to signal after each generation is done.
///
/// @throws LSFG::vulkan_error if the context fails to present.
///
void present(Vulkan& vk,
int inSem, const std::vector<int>& outSem);
// Trivially copyable, moveable and destructible
Context(const Context&) = default;
Context& operator=(const Context&) = default;
Context(Context&&) = default;
Context& operator=(Context&&) = default;
~Context() = default;
private:
Core::Image inImg_0, inImg_1; // inImg_0 is next when fc % 2 == 0
uint64_t frameIdx{0};
struct RenderData {
Core::Semaphore inSemaphore; // signaled when input is ready
std::vector<Core::Semaphore> internalSemaphores; // signaled when first step is done
std::vector<Core::Semaphore> outSemaphores; // signaled when each pass is done
std::vector<Core::Fence> completionFences; // fence for completion of each pass
Core::CommandBuffer cmdBuffer1;
std::vector<Core::CommandBuffer> cmdBuffers2; // command buffers for second step
bool shouldWait{false};
};
std::array<RenderData, 8> data;
Shaders::Mipmaps mipmaps;
std::array<Shaders::Alpha, 7> alpha;
Shaders::Beta beta;
std::array<Shaders::Gamma, 7> gamma;
std::array<Shaders::Delta, 3> delta;
Shaders::Generate generate;
};
}

View file

@ -0,0 +1,60 @@
#pragma once
#include "core/commandbuffer.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/sampler.hpp"
#include "core/shadermodule.hpp"
#include "common/utils.hpp"
#include <array>
#include <cstdint>
namespace LSFG::Shaders {
///
/// Alpha shader.
///
class Alpha {
public:
Alpha() = default;
///
/// Initialize the shaderchain.
///
/// @param inImg One mipmap level
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Alpha(Vulkan& vk, Core::Image inImg);
///
/// Dispatch the shaderchain.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount);
/// Get the output images
[[nodiscard]] const auto& getOutImages() const { return this->outImgs; }
/// Trivially copyable, moveable and destructible
Alpha(const Alpha&) noexcept = default;
Alpha& operator=(const Alpha&) noexcept = default;
Alpha(Alpha&&) noexcept = default;
Alpha& operator=(Alpha&&) noexcept = default;
~Alpha() = default;
private:
std::array<Core::ShaderModule, 4> shaderModules;
std::array<Core::Pipeline, 4> pipelines;
Core::Sampler sampler;
std::array<Core::DescriptorSet, 3> descriptorSets;
std::array<Core::DescriptorSet, 3> lastDescriptorSet;
Core::Image inImg;
std::array<Core::Image, 2> tempImgs1;
std::array<Core::Image, 2> tempImgs2;
std::array<Core::Image, 4> tempImgs3;
std::array<std::array<Core::Image, 4>, 3> outImgs;
};
}

View file

@ -0,0 +1,61 @@
#pragma once
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/sampler.hpp"
#include "core/shadermodule.hpp"
#include "common/utils.hpp"
#include <array>
#include <cstdint>
namespace LSFG::Shaders {
///
/// Beta shader.
///
class Beta {
public:
Beta() = default;
///
/// Initialize the shaderchain.
///
/// @param inImgs Three sets of four RGBA images, corresponding to a frame count % 3.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Beta(Vulkan& vk, std::array<std::array<Core::Image, 4>, 3> inImgs);
///
/// Dispatch the shaderchain.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount);
/// Get the output images
[[nodiscard]] const auto& getOutImages() const { return this->outImgs; }
/// Trivially copyable, moveable and destructible
Beta(const Beta&) noexcept = default;
Beta& operator=(const Beta&) noexcept = default;
Beta(Beta&&) noexcept = default;
Beta& operator=(Beta&&) noexcept = default;
~Beta() = default;
private:
std::array<Core::ShaderModule, 5> shaderModules;
std::array<Core::Pipeline, 5> pipelines;
std::array<Core::Sampler, 2> samplers;
Core::Buffer buffer;
std::array<Core::DescriptorSet, 3> firstDescriptorSet;
std::array<Core::DescriptorSet, 4> descriptorSets;
std::array<std::array<Core::Image, 4>, 3> inImgs;
std::array<Core::Image, 2> tempImgs1;
std::array<Core::Image, 2> tempImgs2;
std::array<Core::Image, 6> outImgs;
};
}

View file

@ -0,0 +1,79 @@
#pragma once
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/sampler.hpp"
#include "core/shadermodule.hpp"
#include "common/utils.hpp"
#include <array>
#include <cstdint>
#include <optional>
#include <vector>
namespace LSFG::Shaders {
///
/// Delta shader.
///
class Delta {
public:
Delta() = default;
///
/// Initialize the shaderchain.
///
/// @param inImgs1 Three sets of four RGBA images, corresponding to a frame count % 3.
/// @param inImg2 Second Input image
/// @param optImg1 Optional image for non-first passes.
/// @param optImg2 Second optional image for non-first passes.
/// @param optImg3 Third optional image for non-first passes.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Delta(Vulkan& vk, std::array<std::array<Core::Image, 4>, 3> inImgs1,
Core::Image inImg2,
std::optional<Core::Image> optImg1,
std::optional<Core::Image> optImg2,
std::optional<Core::Image> optImg3);
///
/// Dispatch the shaderchain.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx);
/// Get the first output image
[[nodiscard]] const auto& getOutImage1() const { return this->outImg1; }
/// Get the second output image
[[nodiscard]] const auto& getOutImage2() const { return this->outImg2; }
/// Trivially copyable, moveable and destructible
Delta(const Delta&) noexcept = default;
Delta& operator=(const Delta&) noexcept = default;
Delta(Delta&&) noexcept = default;
Delta& operator=(Delta&&) noexcept = default;
~Delta() = default;
private:
std::array<Core::ShaderModule, 10> shaderModules;
std::array<Core::Pipeline, 10> pipelines;
std::array<Core::Sampler, 3> samplers;
struct DeltaPass {
Core::Buffer buffer;
std::array<Core::DescriptorSet, 3> firstDescriptorSet;
std::array<Core::DescriptorSet, 8> descriptorSets;
std::array<Core::DescriptorSet, 3> sixthDescriptorSet;
};
std::vector<DeltaPass> passes;
std::array<std::array<Core::Image, 4>, 3> inImgs1;
Core::Image inImg2;
std::optional<Core::Image> optImg1, optImg2, optImg3;
std::array<Core::Image, 4> tempImgs1;
std::array<Core::Image, 4> tempImgs2;
Core::Image outImg1, outImg2;
};
}

View file

@ -0,0 +1,71 @@
#pragma once
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/sampler.hpp"
#include "core/shadermodule.hpp"
#include "common/utils.hpp"
#include <array>
#include <cstdint>
#include <optional>
#include <vector>
namespace LSFG::Shaders {
///
/// Gamma shader.
///
class Gamma {
public:
Gamma() = default;
///
/// Initialize the shaderchain.
///
/// @param inImgs1 Three sets of four RGBA images, corresponding to a frame count % 3.
/// @param inImg2 Second Input image
/// @param optImg Optional image for non-first passes.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Gamma(Vulkan& vk, std::array<std::array<Core::Image, 4>, 3> inImgs1,
Core::Image inImg2, std::optional<Core::Image> optImg);
///
/// Dispatch the shaderchain.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx);
/// Get the output image
[[nodiscard]] const auto& getOutImage() const { return this->outImg; }
/// Trivially copyable, moveable and destructible
Gamma(const Gamma&) noexcept = default;
Gamma& operator=(const Gamma&) noexcept = default;
Gamma(Gamma&&) noexcept = default;
Gamma& operator=(Gamma&&) noexcept = default;
~Gamma() = default;
private:
std::array<Core::ShaderModule, 5> shaderModules;
std::array<Core::Pipeline, 5> pipelines;
std::array<Core::Sampler, 3> samplers;
struct GammaPass {
Core::Buffer buffer;
std::array<Core::DescriptorSet, 3> firstDescriptorSet;
std::array<Core::DescriptorSet, 4> descriptorSets;
};
std::vector<GammaPass> passes;
std::array<std::array<Core::Image, 4>, 3> inImgs1;
Core::Image inImg2;
std::optional<Core::Image> optImg;
std::array<Core::Image, 4> tempImgs1;
std::array<Core::Image, 4> tempImgs2;
Core::Image outImg;
};
}

View file

@ -0,0 +1,70 @@
#pragma once
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/sampler.hpp"
#include "core/shadermodule.hpp"
#include "common/utils.hpp"
#include <vulkan/vulkan_core.h>
#include <array>
#include <vector>
#include <cstdint>
namespace LSFG::Shaders {
///
/// Generate shader.
///
class Generate {
public:
Generate() = default;
///
/// Initialize the shaderchain.
///
/// @param inImg1 Input image 1.
/// @param inImg2 Input image 2.
/// @param inImg3 Input image 3.
/// @param inImg4 Input image 4.
/// @param inImg5 Input image 5.
/// @param fds File descriptors for the output images.
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Generate(Vulkan& vk,
Core::Image inImg1, Core::Image inImg2,
Core::Image inImg3, Core::Image inImg4, Core::Image inImg5,
const std::vector<int>& fds, VkFormat format);
///
/// Dispatch the shaderchain.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx);
/// Trivially copyable, moveable and destructible
Generate(const Generate&) noexcept = default;
Generate& operator=(const Generate&) noexcept = default;
Generate(Generate&&) noexcept = default;
Generate& operator=(Generate&&) noexcept = default;
~Generate() = default;
private:
Core::ShaderModule shaderModule;
Core::Pipeline pipeline;
std::array<Core::Sampler, 2> samplers;
struct GeneratePass {
Core::Buffer buffer;
std::array<Core::DescriptorSet, 2> descriptorSet;
};
std::vector<GeneratePass> passes;
Core::Image inImg1, inImg2;
Core::Image inImg3, inImg4, inImg5;
std::vector<Core::Image> outImgs;
};
}

View file

@ -0,0 +1,59 @@
#pragma once
#include "core/buffer.hpp"
#include "core/commandbuffer.hpp"
#include "core/descriptorset.hpp"
#include "core/image.hpp"
#include "core/pipeline.hpp"
#include "core/sampler.hpp"
#include "core/shadermodule.hpp"
#include "common/utils.hpp"
#include <array>
#include <cstdint>
namespace LSFG::Shaders {
///
/// Mipmaps shader.
///
class Mipmaps {
public:
Mipmaps() = default;
///
/// Initialize the shaderchain.
///
/// @param inImg_0 The next frame (when fc % 2 == 0)
/// @param inImg_1 The next frame (when fc % 2 == 1)
///
/// @throws LSFG::vulkan_error if resource creation fails.
///
Mipmaps(Vulkan& vk, Core::Image inImg_0, Core::Image inImg_1);
///
/// Dispatch the shaderchain.
///
void Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount);
/// Get the output images.
[[nodiscard]] const auto& getOutImages() const { return this->outImgs; }
/// Trivially copyable, moveable and destructible
Mipmaps(const Mipmaps&) noexcept = default;
Mipmaps& operator=(const Mipmaps&) noexcept = default;
Mipmaps(Mipmaps&&) noexcept = default;
Mipmaps& operator=(Mipmaps&&) noexcept = default;
~Mipmaps() = default;
private:
Core::ShaderModule shaderModule;
Core::Pipeline pipeline;
Core::Buffer buffer;
Core::Sampler sampler;
std::array<Core::DescriptorSet, 2> descriptorSets;
Core::Image inImg_0, inImg_1;
std::array<Core::Image, 7> outImgs;
};
}

View file

@ -0,0 +1,66 @@
#pragma once
#include <vulkan/vulkan_core.h>
#include <functional>
#include <cstdint>
#include <string>
#include <vector>
namespace LSFG_3_1P {
///
/// Initialize the LSFG library.
///
/// @param deviceUUID The UUID of the Vulkan device to use.
/// @param isHdr Whether the images are in HDR format.
/// @param flowScale Internal flow scale factor.
/// @param generationCount Number of frames to generate.
/// @param loader Function to load shader source code by name.
///
/// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
///
void initialize(uint64_t deviceUUID,
bool isHdr, float flowScale, uint64_t generationCount,
const std::function<std::vector<uint8_t>(const std::string&)>& loader);
///
/// Create a new LSFG context on a swapchain.
///
/// @param in0 File descriptor for the first input image.
/// @param in1 File descriptor for the second input image.
/// @param outN File descriptor for each output image. This defines the LSFG level.
/// @param extent The size of the images
/// @param format The format of the images.
/// @return A unique identifier for the created context.
///
/// @throws LSFG::vulkan_error if the context cannot be created.
///
int32_t createContext(
int in0, int in1, const std::vector<int>& outN,
VkExtent2D extent, VkFormat format);
///
/// Present a context.
///
/// @param id Unique identifier of the context to present.
/// @param inSem Semaphore to wait on before starting the generation.
/// @param outSem Semaphores to signal once each output image is ready.
///
/// @throws LSFG::vulkan_error if the context cannot be presented.
///
void presentContext(int32_t id, int inSem, const std::vector<int>& outSem);
///
/// Delete an LSFG context.
///
/// @param id Unique identifier of the context to delete.
///
void deleteContext(int32_t id);
///
/// Deinitialize the LSFG library.
///
void finalize();
}

View file

@ -0,0 +1,121 @@
#include "context.hpp"
#include "common/utils.hpp"
#include "common/exception.hpp"
#include <vulkan/vulkan_core.h>
#include <vector>
#include <cstddef>
#include <algorithm>
#include <optional>
#include <cstdint>
using namespace LSFG;
Context::Context(Vulkan& vk,
int in0, int in1, const std::vector<int>& outN,
VkExtent2D extent, VkFormat format) {
// import input images
this->inImg_0 = Core::Image(vk.device, extent, format,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT, in0);
this->inImg_1 = Core::Image(vk.device, extent, format,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT, in1);
// prepare render data
for (size_t i = 0; i < 8; i++) {
auto& data = this->data.at(i);
data.internalSemaphores.resize(vk.generationCount);
data.outSemaphores.resize(vk.generationCount);
data.completionFences.resize(vk.generationCount);
data.cmdBuffers2.resize(vk.generationCount);
}
// create shader chains
this->mipmaps = Shaders::Mipmaps(vk, this->inImg_0, this->inImg_1);
for (size_t i = 0; i < 7; i++)
this->alpha.at(i) = Shaders::Alpha(vk, this->mipmaps.getOutImages().at(i));
this->beta = Shaders::Beta(vk, this->alpha.at(0).getOutImages());
for (size_t i = 0; i < 7; i++) {
this->gamma.at(i) = Shaders::Gamma(vk,
this->alpha.at(6 - i).getOutImages(),
this->beta.getOutImages().at(std::min<size_t>(6 - i, 5)),
(i == 0) ? std::nullopt : std::make_optional(this->gamma.at(i - 1).getOutImage()));
if (i < 4) continue;
this->delta.at(i - 4) = Shaders::Delta(vk,
this->alpha.at(6 - i).getOutImages(),
this->beta.getOutImages().at(6 - i),
(i == 4) ? std::nullopt : std::make_optional(this->gamma.at(i - 1).getOutImage()),
(i == 4) ? std::nullopt : std::make_optional(this->delta.at(i - 5).getOutImage1()),
(i == 4) ? std::nullopt : std::make_optional(this->delta.at(i - 5).getOutImage2()));
}
this->generate = Shaders::Generate(vk,
this->inImg_0, this->inImg_1,
this->gamma.at(6).getOutImage(),
this->delta.at(2).getOutImage1(),
this->delta.at(2).getOutImage2(),
outN, format);
}
void Context::present(Vulkan& vk,
int inSem, const std::vector<int>& outSem) {
auto& data = this->data.at(this->frameIdx % 8);
// 3. wait for completion of previous frame in this slot
if (data.shouldWait)
for (auto& fence : data.completionFences)
if (!fence.wait(vk.device, UINT64_MAX))
throw LSFG::vulkan_error(VK_TIMEOUT, "Fence wait timed out");
data.shouldWait = true;
// 1. create mipmaps and process input image
if (inSem >= 0) data.inSemaphore = Core::Semaphore(vk.device, inSem);
for (size_t i = 0; i < vk.generationCount; i++)
data.internalSemaphores.at(i) = Core::Semaphore(vk.device);
data.cmdBuffer1 = Core::CommandBuffer(vk.device, vk.commandPool);
data.cmdBuffer1.begin();
this->mipmaps.Dispatch(data.cmdBuffer1, this->frameIdx);
for (size_t i = 0; i < 7; i++)
this->alpha.at(6 - i).Dispatch(data.cmdBuffer1, this->frameIdx);
this->beta.Dispatch(data.cmdBuffer1, this->frameIdx);
data.cmdBuffer1.end();
std::vector<Core::Semaphore> waits = { data.inSemaphore };
if (inSem < 0) waits.clear();
data.cmdBuffer1.submit(vk.device.getComputeQueue(), std::nullopt,
waits, std::nullopt,
data.internalSemaphores, std::nullopt);
// 2. generate intermediary frames
for (size_t pass = 0; pass < vk.generationCount; pass++) {
auto& internalSemaphore = data.internalSemaphores.at(pass);
auto& outSemaphore = data.outSemaphores.at(pass);
if (inSem >= 0) outSemaphore = Core::Semaphore(vk.device, outSem.empty() ? -1 : outSem.at(pass));
auto& completionFence = data.completionFences.at(pass);
completionFence = Core::Fence(vk.device);
auto& buf2 = data.cmdBuffers2.at(pass);
buf2 = Core::CommandBuffer(vk.device, vk.commandPool);
buf2.begin();
for (size_t i = 0; i < 7; i++) {
this->gamma.at(i).Dispatch(buf2, this->frameIdx, pass);
if (i >= 4)
this->delta.at(i - 4).Dispatch(buf2, this->frameIdx, pass);
}
this->generate.Dispatch(buf2, this->frameIdx, pass);
buf2.end();
std::vector<Core::Semaphore> signals = { outSemaphore };
if (inSem < 0) signals.clear();
buf2.submit(vk.device.getComputeQueue(), completionFence,
{ internalSemaphore }, std::nullopt,
signals, std::nullopt);
}
this->frameIdx++;
}

View file

@ -0,0 +1,96 @@
#include "lsfg_3_1p.hpp"
#include "context.hpp"
#include "core/commandpool.hpp"
#include "core/descriptorpool.hpp"
#include "core/instance.hpp"
#include "pool/shaderpool.hpp"
#include "common/exception.hpp"
#include "common/utils.hpp"
#include <vulkan/vulkan_core.h>
#include <cstdint>
#include <optional>
#include <cstdlib>
#include <ctime>
#include <functional>
#include <string>
#include <unordered_map>
#include <vector>
using namespace LSFG;
using namespace LSFG_3_1P;
namespace {
std::optional<Core::Instance> instance;
std::optional<Vulkan> device;
std::unordered_map<int32_t, Context> contexts;
}
void LSFG_3_1P::initialize(uint64_t deviceUUID,
bool isHdr, float flowScale, uint64_t generationCount,
const std::function<std::vector<uint8_t>(const std::string&)>& loader) {
if (instance.has_value() || device.has_value())
return;
instance.emplace();
device.emplace(Vulkan {
.device{*instance, deviceUUID},
.generationCount = generationCount,
.flowScale = flowScale,
.isHdr = isHdr
});
contexts = std::unordered_map<int32_t, Context>();
device->commandPool = Core::CommandPool(device->device);
device->descriptorPool = Core::DescriptorPool(device->device);
device->resources = Pool::ResourcePool(device->isHdr, device->flowScale);
device->shaders = Pool::ShaderPool(loader);
std::srand(static_cast<uint32_t>(std::time(nullptr)));
}
int32_t LSFG_3_1P::createContext(
int in0, int in1, const std::vector<int>& outN,
VkExtent2D extent, VkFormat format) {
if (!instance.has_value() || !device.has_value())
throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "LSFG not initialized");
const int32_t id = std::rand();
contexts.emplace(id, Context(*device, in0, in1, outN, extent, format));
return id;
}
void LSFG_3_1P::presentContext(int32_t id, int inSem, const std::vector<int>& outSem) {
if (!instance.has_value() || !device.has_value())
throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "LSFG not initialized");
auto it = contexts.find(id);
if (it == contexts.end())
throw LSFG::vulkan_error(VK_ERROR_UNKNOWN, "Context not found");
it->second.present(*device, inSem, outSem);
}
void LSFG_3_1P::deleteContext(int32_t id) {
if (!instance.has_value() || !device.has_value())
throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "LSFG not initialized");
auto it = contexts.find(id);
if (it == contexts.end())
throw LSFG::vulkan_error(VK_ERROR_DEVICE_LOST, "No such context");
vkDeviceWaitIdle(device->device.handle());
contexts.erase(it);
}
void LSFG_3_1P::finalize() {
if (!instance.has_value() || !device.has_value())
return;
vkDeviceWaitIdle(device->device.handle());
contexts.clear();
device.reset();
instance.reset();
}

View file

@ -0,0 +1,139 @@
#include "shaders/alpha.hpp"
#include "common/utils.hpp"
#include "core/commandbuffer.hpp"
#include "core/image.hpp"
#include <vulkan/vulkan_core.h>
#include <utility>
#include <cstddef>
#include <cstdint>
using namespace LSFG::Shaders;
Alpha::Alpha(Vulkan& vk, Core::Image inImg) : inImg(std::move(inImg)) {
// create resources
this->shaderModules = {{
vk.shaders.getShader(vk.device, "alpha[0]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "alpha[1]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "alpha[2]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "alpha[3]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
vk.shaders.getPipeline(vk.device, "alpha[0]"),
vk.shaders.getPipeline(vk.device, "alpha[1]"),
vk.shaders.getPipeline(vk.device, "alpha[2]"),
vk.shaders.getPipeline(vk.device, "alpha[3]")
}};
this->sampler = vk.resources.getSampler(vk.device);
for (size_t i = 0; i < 3; i++)
this->descriptorSets.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModules.at(i));
for (size_t i = 0; i < 3; i++)
this->lastDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModules.at(3));
// create internal images/outputs
const VkExtent2D extent = this->inImg.getExtent();
const VkExtent2D halfExtent = {
.width = (extent.width + 1) >> 1,
.height = (extent.height + 1) >> 1
};
for (size_t i = 0; i < 2; i++) {
this->tempImgs1.at(i) = Core::Image(vk.device, halfExtent);
this->tempImgs2.at(i) = Core::Image(vk.device, halfExtent);
}
const VkExtent2D quarterExtent = {
.width = (halfExtent.width + 1) >> 1,
.height = (halfExtent.height + 1) >> 1
};
for (size_t i = 0; i < 4; i++) {
this->tempImgs3.at(i) = Core::Image(vk.device, quarterExtent);
for (size_t j = 0; j < 3; j++)
this->outImgs.at(j).at(i) = Core::Image(vk.device, quarterExtent);
}
// hook up shaders
this->descriptorSets.at(0).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
this->descriptorSets.at(1).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(2).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs3)
.build();
for (size_t i = 0; i < 3; i++)
this->lastDescriptorSet.at(i).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs3)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs.at(i))
.build();
}
void Alpha::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount) {
// first pass
const auto halfExtent = this->tempImgs1.at(0).getExtent();
uint32_t threadsX = (halfExtent.width + 7) >> 3;
uint32_t threadsY = (halfExtent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImg)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(0).bind(buf);
this->descriptorSets.at(0).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
this->descriptorSets.at(1).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third pass
const auto quarterExtent = this->tempImgs3.at(0).getExtent();
threadsX = (quarterExtent.width + 7) >> 3;
threadsY = (quarterExtent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs3)
.build();
this->pipelines.at(2).bind(buf);
this->descriptorSets.at(2).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs3)
.addR2W(this->outImgs.at(frameCount % 3))
.build();
this->pipelines.at(3).bind(buf);
this->lastDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -0,0 +1,161 @@
#include "shaders/beta.hpp"
#include "common/utils.hpp"
#include "core/commandbuffer.hpp"
#include "core/image.hpp"
#include <vulkan/vulkan_core.h>
#include <array>
#include <utility>
#include <cstddef>
#include <cstdint>
using namespace LSFG::Shaders;
Beta::Beta(Vulkan& vk, std::array<std::array<Core::Image, 4>, 3> inImgs)
: inImgs(std::move(inImgs)) {
// create resources
this->shaderModules = {{
vk.shaders.getShader(vk.device, "beta[0]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 12, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "beta[1]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "beta[2]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "beta[3]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "beta[4]",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 6, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
vk.shaders.getPipeline(vk.device, "beta[0]"),
vk.shaders.getPipeline(vk.device, "beta[1]"),
vk.shaders.getPipeline(vk.device, "beta[2]"),
vk.shaders.getPipeline(vk.device, "beta[3]"),
vk.shaders.getPipeline(vk.device, "beta[4]")
}};
this->samplers.at(0) = vk.resources.getSampler(vk.device);
this->samplers.at(1) = vk.resources.getSampler(vk.device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, true);
for (size_t i = 0; i < 3; i++)
this->firstDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModules.at(0));
for (size_t i = 0; i < 4; i++)
this->descriptorSets.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModules.at(i + 1));
this->buffer = vk.resources.getBuffer(vk.device, 0.5F);
// create internal images/outputs
const VkExtent2D extent = this->inImgs.at(0).at(0).getExtent();
for (size_t i = 0; i < 2; i++) {
this->tempImgs1.at(i) = Core::Image(vk.device, extent);
this->tempImgs2.at(i) = Core::Image(vk.device, extent);
}
for (size_t i = 0; i < 6; i++)
this->outImgs.at(i) = Core::Image(vk.device,
{ extent.width >> i, extent.height >> i },
VK_FORMAT_R8_UNORM);
// hook up shaders
for (size_t i = 0; i < 3; i++) {
this->firstDescriptorSet.at(i).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs.at((i + 1) % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs.at((i + 2) % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs.at(i % 3))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
}
this->descriptorSets.at(0).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(1).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
this->descriptorSets.at(2).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
this->descriptorSets.at(3).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs)
.build();
}
void Beta::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount) {
// first pass
const auto extent = this->tempImgs1.at(0).getExtent();
uint32_t threadsX = (extent.width + 7) >> 3;
uint32_t threadsY = (extent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImgs.at(0))
.addW2R(this->inImgs.at(1))
.addW2R(this->inImgs.at(2))
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(0).bind(buf);
this->firstDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
this->descriptorSets.at(0).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(2).bind(buf);
this->descriptorSets.at(1).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth pass
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(3).bind(buf);
this->descriptorSets.at(2).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
// fifth pass
threadsX = (extent.width + 31) >> 5;
threadsY = (extent.height + 31) >> 5;
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->outImgs)
.build();
this->pipelines.at(4).bind(buf);
this->descriptorSets.at(3).bind(buf, this->pipelines.at(4));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -0,0 +1,340 @@
#include "shaders/delta.hpp"
#include "common/utils.hpp"
#include "core/commandbuffer.hpp"
#include "core/image.hpp"
#include <vulkan/vulkan_core.h>
#include <array>
#include <optional>
#include <utility>
#include <cstddef>
#include <cstdint>
using namespace LSFG::Shaders;
Delta::Delta(Vulkan& vk, std::array<std::array<Core::Image, 4>, 3> inImgs1,
Core::Image inImg2,
std::optional<Core::Image> optImg1,
std::optional<Core::Image> optImg2,
std::optional<Core::Image> optImg3)
: inImgs1(std::move(inImgs1)), inImg2(std::move(inImg2)),
optImg1(std::move(optImg1)), optImg2(std::move(optImg2)),
optImg3(std::move(optImg3)) {
// create resources
this->shaderModules = {{
vk.shaders.getShader(vk.device, "delta[0]",
{ { 1 , VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 9, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 3, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[1]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[2]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[3]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[4]",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 6, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[5]",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 10, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[6]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[7]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[8]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 2, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "delta[9]",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
vk.shaders.getPipeline(vk.device, "delta[0]"),
vk.shaders.getPipeline(vk.device, "delta[1]"),
vk.shaders.getPipeline(vk.device, "delta[2]"),
vk.shaders.getPipeline(vk.device, "delta[3]"),
vk.shaders.getPipeline(vk.device, "delta[4]"),
vk.shaders.getPipeline(vk.device, "delta[5]"),
vk.shaders.getPipeline(vk.device, "delta[6]"),
vk.shaders.getPipeline(vk.device, "delta[7]"),
vk.shaders.getPipeline(vk.device, "delta[8]"),
vk.shaders.getPipeline(vk.device, "delta[9]")
}};
this->samplers.at(0) = vk.resources.getSampler(vk.device);
this->samplers.at(1) = vk.resources.getSampler(vk.device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, true);
this->samplers.at(2) = vk.resources.getSampler(vk.device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS, false);
// create internal images/outputs
const VkExtent2D extent = this->inImgs1.at(0).at(0).getExtent();
for (size_t i = 0; i < 4; i++) {
this->tempImgs1.at(i) = Core::Image(vk.device, extent);
this->tempImgs2.at(i) = Core::Image(vk.device, extent);
}
this->outImg1 = Core::Image(vk.device,
{ extent.width, extent.height },
VK_FORMAT_R16G16B16A16_SFLOAT);
this->outImg2 = Core::Image(vk.device,
{ extent.width, extent.height },
VK_FORMAT_R16G16B16A16_SFLOAT);
// hook up shaders
for (size_t pass_idx = 0; pass_idx < vk.generationCount; pass_idx++) {
auto& pass = this->passes.emplace_back();
pass.buffer = vk.resources.getBuffer(vk.device,
static_cast<float>(pass_idx + 1) / static_cast<float>(vk.generationCount + 1),
false, !this->optImg1.has_value());
for (size_t i = 0; i < 3; i++) {
pass.firstDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(0));
pass.firstDescriptorSet.at(i).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at((i + 2) % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at(i % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(2))
.build();
}
pass.descriptorSets.at(0) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(1));
pass.descriptorSets.at(0).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(2))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
pass.descriptorSets.at(1) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(2));
pass.descriptorSets.at(1).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
pass.descriptorSets.at(2) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(3));
pass.descriptorSets.at(2).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
pass.descriptorSets.at(3) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(4));
pass.descriptorSets.at(3).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg1)
.build();
for (size_t i = 0; i < 3; i++) {
pass.sixthDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(5));
pass.sixthDescriptorSet.at(i).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at((i + 2) % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at(i % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2.at(1))
.build();
}
pass.descriptorSets.at(4) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(6));
pass.descriptorSets.at(4).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2.at(1))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1))
.build();
pass.descriptorSets.at(5) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(7));
pass.descriptorSets.at(5).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2.at(1))
.build();
pass.descriptorSets.at(6) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(8));
pass.descriptorSets.at(6).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2.at(1))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1))
.build();
pass.descriptorSets.at(7) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(9));
pass.descriptorSets.at(7).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg3)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg2)
.build();
}
}
void Delta::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx) {
auto& pass = this->passes.at(pass_idx);
// first shader
const auto extent = this->tempImgs1.at(0).getExtent();
const uint32_t threadsX = (extent.width + 7) >> 3;
const uint32_t threadsY = (extent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImgs1.at((frameCount + 2) % 3))
.addW2R(this->inImgs1.at(frameCount % 3))
.addW2R(this->optImg1)
.addR2W(this->tempImgs1.at(0))
.addR2W(this->tempImgs1.at(1))
.addR2W(this->tempImgs1.at(2))
.build();
this->pipelines.at(0).bind(buf);
pass.firstDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1.at(0))
.addW2R(this->tempImgs1.at(1))
.addW2R(this->tempImgs1.at(2))
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
pass.descriptorSets.at(0).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(2).bind(buf);
pass.descriptorSets.at(1).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(3).bind(buf);
pass.descriptorSets.at(2).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
// fifth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addW2R(this->optImg1)
.addW2R(this->inImg2)
.addR2W(this->outImg1)
.build();
this->pipelines.at(4).bind(buf);
pass.descriptorSets.at(3).bind(buf, this->pipelines.at(4));
buf.dispatch(threadsX, threadsY, 1);
// sixth shader
Utils::BarrierBuilder(buf)
.addW2R(this->inImgs1.at((frameCount + 2) % 3))
.addW2R(this->inImgs1.at(frameCount % 3))
.addW2R(this->optImg1)
.addW2R(this->optImg2)
.addR2W(this->tempImgs2.at(0))
.addR2W(this->tempImgs2.at(1))
.build();
this->pipelines.at(5).bind(buf);
pass.sixthDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(5));
buf.dispatch(threadsX, threadsY, 1);
// seventh shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2.at(0))
.addW2R(this->tempImgs2.at(1))
.addR2W(this->tempImgs1.at(0))
.addR2W(this->tempImgs1.at(1))
.build();
this->pipelines.at(6).bind(buf);
pass.descriptorSets.at(4).bind(buf, this->pipelines.at(6));
buf.dispatch(threadsX, threadsY, 1);
// eighth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1.at(0))
.addW2R(this->tempImgs1.at(1))
.addR2W(this->tempImgs2.at(0))
.addR2W(this->tempImgs2.at(1))
.build();
this->pipelines.at(7).bind(buf);
pass.descriptorSets.at(5).bind(buf, this->pipelines.at(7));
buf.dispatch(threadsX, threadsY, 1);
// ninth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2.at(0))
.addW2R(this->tempImgs2.at(1))
.addW2R(this->optImg3)
.addR2W(this->tempImgs1.at(0))
.addR2W(this->tempImgs1.at(1))
.build();
this->pipelines.at(8).bind(buf);
pass.descriptorSets.at(6).bind(buf, this->pipelines.at(8));
buf.dispatch(threadsX, threadsY, 1);
// tenth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1.at(0))
.addW2R(this->tempImgs1.at(1))
.addW2R(this->optImg3)
.addR2W(this->outImg2)
.build();
this->pipelines.at(9).bind(buf);
pass.descriptorSets.at(7).bind(buf, this->pipelines.at(9));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -0,0 +1,192 @@
#include "shaders/gamma.hpp"
#include "common/utils.hpp"
#include "core/commandbuffer.hpp"
#include "core/image.hpp"
#include <vulkan/vulkan_core.h>
#include <array>
#include <optional>
#include <utility>
#include <cstddef>
#include <cstdint>
using namespace LSFG::Shaders;
Gamma::Gamma(Vulkan& vk, std::array<std::array<Core::Image, 4>, 3> inImgs1,
Core::Image inImg2,
std::optional<Core::Image> optImg)
: inImgs1(std::move(inImgs1)), inImg2(std::move(inImg2)),
optImg(std::move(optImg)) {
// create resources
this->shaderModules = {{
vk.shaders.getShader(vk.device, "gamma[0]",
{ { 1 , VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 9, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 3, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "gamma[1]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 3, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "gamma[2]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "gamma[3]",
{ { 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 4, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 4, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }),
vk.shaders.getShader(vk.device, "gamma[4]",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 6, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } })
}};
this->pipelines = {{
vk.shaders.getPipeline(vk.device, "gamma[0]"),
vk.shaders.getPipeline(vk.device, "gamma[1]"),
vk.shaders.getPipeline(vk.device, "gamma[2]"),
vk.shaders.getPipeline(vk.device, "gamma[3]"),
vk.shaders.getPipeline(vk.device, "gamma[4]")
}};
this->samplers.at(0) = vk.resources.getSampler(vk.device);
this->samplers.at(1) = vk.resources.getSampler(vk.device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, true);
this->samplers.at(2) = vk.resources.getSampler(vk.device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS, false);
// create internal images/outputs
const VkExtent2D extent = this->inImgs1.at(0).at(0).getExtent();
for (size_t i = 0; i < 4; i++) {
this->tempImgs1.at(i) = Core::Image(vk.device, extent);
this->tempImgs2.at(i) = Core::Image(vk.device, extent);
}
this->outImg = Core::Image(vk.device,
{ extent.width, extent.height },
VK_FORMAT_R16G16B16A16_SFLOAT);
// hook up shaders
for (size_t pass_idx = 0; pass_idx < vk.generationCount; pass_idx++) {
auto& pass = this->passes.emplace_back();
pass.buffer = vk.resources.getBuffer(vk.device,
static_cast<float>(pass_idx + 1) / static_cast<float>(vk.generationCount + 1),
!this->optImg.has_value());
for (size_t i = 0; i < 3; i++) {
pass.firstDescriptorSet.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(0));
pass.firstDescriptorSet.at(i).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at((i + 2) % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImgs1.at(i % 3))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1.at(2))
.build();
}
pass.descriptorSets.at(0) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(1));
pass.descriptorSets.at(0).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(1))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1.at(2))
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
pass.descriptorSets.at(1) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(2));
pass.descriptorSets.at(1).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs1)
.build();
pass.descriptorSets.at(2) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(3));
pass.descriptorSets.at(2).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->tempImgs2)
.build();
pass.descriptorSets.at(3) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModules.at(4));
pass.descriptorSets.at(3).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(0))
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers.at(2))
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->tempImgs2)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->optImg)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg2)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImg)
.build();
}
}
void Gamma::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx) {
auto& pass = this->passes.at(pass_idx);
// first shader
const auto extent = this->tempImgs1.at(0).getExtent();
const uint32_t threadsX = (extent.width + 7) >> 3;
const uint32_t threadsY = (extent.height + 7) >> 3;
Utils::BarrierBuilder(buf)
.addW2R(this->inImgs1.at((frameCount + 2) % 3))
.addW2R(this->inImgs1.at(frameCount % 3))
.addW2R(this->optImg)
.addR2W(this->tempImgs1.at(0))
.addR2W(this->tempImgs1.at(1))
.addR2W(this->tempImgs1.at(2))
.build();
this->pipelines.at(0).bind(buf);
pass.firstDescriptorSet.at(frameCount % 3).bind(buf, this->pipelines.at(0));
buf.dispatch(threadsX, threadsY, 1);
// second shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1.at(0))
.addW2R(this->tempImgs1.at(1))
.addW2R(this->tempImgs1.at(2))
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(1).bind(buf);
pass.descriptorSets.at(0).bind(buf, this->pipelines.at(1));
buf.dispatch(threadsX, threadsY, 1);
// third shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addR2W(this->tempImgs1)
.build();
this->pipelines.at(2).bind(buf);
pass.descriptorSets.at(1).bind(buf, this->pipelines.at(2));
buf.dispatch(threadsX, threadsY, 1);
// fourth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs1)
.addR2W(this->tempImgs2)
.build();
this->pipelines.at(3).bind(buf);
pass.descriptorSets.at(2).bind(buf, this->pipelines.at(3));
buf.dispatch(threadsX, threadsY, 1);
// fifth shader
Utils::BarrierBuilder(buf)
.addW2R(this->tempImgs2)
.addW2R(this->optImg)
.addW2R(this->inImg2)
.addR2W(this->outImg)
.build();
this->pipelines.at(4).bind(buf);
pass.descriptorSets.at(3).bind(buf, this->pipelines.at(4));
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -0,0 +1,82 @@
#include "shaders/generate.hpp"
#include "common/utils.hpp"
#include "core/commandbuffer.hpp"
#include "core/image.hpp"
#include <vulkan/vulkan_core.h>
#include <vector>
#include <utility>
#include <cstddef>
#include <cstdint>
using namespace LSFG::Shaders;
Generate::Generate(Vulkan& vk,
Core::Image inImg1, Core::Image inImg2,
Core::Image inImg3, Core::Image inImg4, Core::Image inImg5,
const std::vector<int>& fds, VkFormat format)
: inImg1(std::move(inImg1)), inImg2(std::move(inImg2)),
inImg3(std::move(inImg3)), inImg4(std::move(inImg4)),
inImg5(std::move(inImg5)) {
// create resources
this->shaderModule = vk.shaders.getShader(vk.device, "generate",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 2, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 5, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } });
this->pipeline = vk.shaders.getPipeline(vk.device, "generate");
this->samplers.at(0) = vk.resources.getSampler(vk.device);
this->samplers.at(1) = vk.resources.getSampler(vk.device,
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS);
// create internal images/outputs
const VkExtent2D extent = this->inImg1.getExtent();
for (size_t i = 0; i < vk.generationCount; i++)
this->outImgs.emplace_back(vk.device, extent, format,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
VK_IMAGE_ASPECT_COLOR_BIT, fds.empty() ? -1 : fds.at(i));
// hook up shaders
for (size_t i = 0; i < vk.generationCount; i++) {
auto& pass = this->passes.emplace_back();
pass.buffer = vk.resources.getBuffer(vk.device,
static_cast<float>(i + 1) / static_cast<float>(vk.generationCount + 1));
for (size_t j = 0; j < 2; j++) {
pass.descriptorSet.at(j) = Core::DescriptorSet(vk.device, vk.descriptorPool,
this->shaderModule);
pass.descriptorSet.at(j).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, pass.buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->samplers)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, j == 0 ? this->inImg2 : this->inImg1)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, j == 0 ? this->inImg1 : this->inImg2)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg3)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg4)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, this->inImg5)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs.at(i))
.build();
}
}
}
void Generate::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount, uint64_t pass_idx) {
auto& pass = this->passes.at(pass_idx);
// first pass
const auto extent = this->inImg1.getExtent();
const uint32_t threadsX = (extent.width + 15) >> 4;
const uint32_t threadsY = (extent.height + 15) >> 4;
Utils::BarrierBuilder(buf)
.addW2R(this->inImg1)
.addW2R(this->inImg2)
.addW2R(this->inImg3)
.addW2R(this->inImg4)
.addW2R(this->inImg5)
.addR2W(this->outImgs.at(pass_idx))
.build();
this->pipeline.bind(buf);
pass.descriptorSet.at(frameCount % 2).bind(buf, this->pipeline);
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -0,0 +1,65 @@
#include "shaders/mipmaps.hpp"
#include "common/utils.hpp"
#include "core/image.hpp"
#include "core/commandbuffer.hpp"
#include <vulkan/vulkan_core.h>
#include <utility>
#include <cstddef>
#include <cstdint>
using namespace LSFG::Shaders;
Mipmaps::Mipmaps(Vulkan& vk,
Core::Image inImg_0, Core::Image inImg_1)
: inImg_0(std::move(inImg_0)), inImg_1(std::move(inImg_1)) {
// create resources
this->shaderModule = vk.shaders.getShader(vk.device, "mipmaps",
{ { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER },
{ 1, VK_DESCRIPTOR_TYPE_SAMPLER },
{ 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE },
{ 7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } });
this->pipeline = vk.shaders.getPipeline(vk.device, "mipmaps");
this->buffer = vk.resources.getBuffer(vk.device);
this->sampler = vk.resources.getSampler(vk.device);
for (size_t i = 0; i < 2; i++)
this->descriptorSets.at(i) = Core::DescriptorSet(vk.device, vk.descriptorPool, this->shaderModule);
// create outputs
const VkExtent2D flowExtent{
.width = static_cast<uint32_t>(
static_cast<float>(this->inImg_0.getExtent().width) / vk.flowScale),
.height = static_cast<uint32_t>(
static_cast<float>(this->inImg_0.getExtent().height) / vk.flowScale)
};
for (size_t i = 0; i < 7; i++)
this->outImgs.at(i) = Core::Image(vk.device,
{ flowExtent.width >> i, flowExtent.height >> i },
VK_FORMAT_R8_UNORM);
// hook up shaders
for (size_t fc = 0; fc < 2; fc++)
this->descriptorSets.at(fc).update(vk.device)
.add(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, this->buffer)
.add(VK_DESCRIPTOR_TYPE_SAMPLER, this->sampler)
.add(VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, (fc % 2 == 0) ? this->inImg_0 : this->inImg_1)
.add(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, this->outImgs)
.build();
}
void Mipmaps::Dispatch(const Core::CommandBuffer& buf, uint64_t frameCount) {
// first pass
const auto flowExtent = this->outImgs.at(0).getExtent();
const uint32_t threadsX = (flowExtent.width + 63) >> 6;
const uint32_t threadsY = (flowExtent.height + 63) >> 6;
Utils::BarrierBuilder(buf)
.addW2R((frameCount % 2 == 0) ? this->inImg_0 : this->inImg_1)
.addR2W(this->outImgs)
.build();
this->pipeline.bind(buf);
this->descriptorSets.at(frameCount % 2).bind(buf, this->pipeline);
buf.dispatch(threadsX, threadsY, 1);
}

View file

@ -8,7 +8,8 @@
#include "common/exception.hpp"
#include <vulkan/vulkan_core.h>
#include <lsfg.hpp>
#include <lsfg_3_1.hpp>
#include <lsfg_3_1p.hpp>
#include <algorithm>
#include <cstdint>
@ -33,6 +34,11 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain,
? *lsfgHdrStr == '1'
: false;
const char* lsfgPerfModeStr = getenv("LSFG_PERF_MODE");
const bool perfMode = lsfgPerfModeStr
? *lsfgPerfModeStr == '1'
: false;
// we could take the format from the swapchain,
// but honestly this is safer.
const VkFormat format = isHdr
@ -72,11 +78,21 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain,
i, out_n_fds.at(i));
}
auto* lsfgInitialize = LSFG_3_1::initialize;
auto* lsfgCreateContext = LSFG_3_1::createContext;
auto* lsfgDeleteContext = LSFG_3_1::deleteContext;
if (perfMode) {
Log::debug("context", "Using performance mode");
this->isPerfMode = true;
lsfgInitialize = LSFG_3_1P::initialize;
lsfgCreateContext = LSFG_3_1P::createContext;
lsfgDeleteContext = LSFG_3_1P::deleteContext;
}
// initialize lsfg
Log::debug("context", "(entering LSFG initialization)");
setenv("DISABLE_LSFG", "1", 1); // NOLINT
Extract::extractShaders();
LSFG_3_1::initialize(
lsfgInitialize(
Utils::getDeviceUUID(info.physicalDevice),
isHdr, 1.0F / flowScale, info.frameGen,
[](const std::string& name) {
@ -91,12 +107,12 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain,
// create lsfg context
Log::debug("context", "(entering LSFG context creation)");
this->lsfgCtxId = std::shared_ptr<int32_t>(
new int32_t(LSFG_3_1::createContext(frame_0_fd, frame_1_fd, out_n_fds,
new int32_t(lsfgCreateContext(frame_0_fd, frame_1_fd, out_n_fds,
extent, format)),
[](const int32_t* id) {
[lsfgDeleteContext = lsfgDeleteContext](const int32_t* id) {
Log::info("context",
"(entering LSFG context deletion with id: {})", *id);
LSFG_3_1::deleteContext(*id);
lsfgDeleteContext(*id);
Log::info("context",
"(exiting LSFG context deletion with id: {})", *id);
}
@ -157,9 +173,15 @@ VkResult LsContext::present(const Hooks::DeviceInfo& info, const void* pNext, Vk
Log::debug("context2",
"(entering LSFG present with id: {})", *this->lsfgCtxId);
LSFG_3_1::presentContext(*this->lsfgCtxId,
preCopySemaphoreFd,
renderSemaphoreFds);
if (this->isPerfMode) {
LSFG_3_1P::presentContext(*this->lsfgCtxId,
preCopySemaphoreFd,
renderSemaphoreFds);
} else {
LSFG_3_1::presentContext(*this->lsfgCtxId,
preCopySemaphoreFd,
renderSemaphoreFds);
}
Log::debug("context2",
"(exiting LSFG present with id: {})", *this->lsfgCtxId);

View file

@ -3,7 +3,8 @@
#include "utils/log.hpp"
#include <vulkan/vulkan_core.h>
#include <lsfg.hpp>
#include <lsfg_3_1.hpp>
#include <lsfg_3_1p.hpp>
#include <cstdint>
#include <chrono>
@ -34,6 +35,7 @@ namespace {
const char* lsfgMultiplier = std::getenv("LSFG_MULTIPLIER");
const char* lsfgExtentWidth = std::getenv("LSFG_EXTENT_WIDTH");
const char* lsfgExtentHeight = std::getenv("LSFG_EXTENT_HEIGHT");
const char* lsfgPerfMode = std::getenv("LSFG_PERF_MODE");
const float flowScale = lsfgFlowScale
? std::stof(lsfgFlowScale) : 1.0F;
@ -45,6 +47,17 @@ namespace {
? static_cast<uint32_t>(std::stoul(lsfgExtentWidth)) : 1920;
const uint32_t height = lsfgExtentHeight
? static_cast<uint32_t>(std::stoul(lsfgExtentHeight)) : 1080;
const bool perfMode = lsfgPerfMode
? *lsfgPerfMode == '1' : false;
auto* lsfgInitialize = LSFG_3_1::initialize;
auto* lsfgCreateContext = LSFG_3_1::createContext;
auto* lsfgPresentContext = LSFG_3_1::presentContext;
if (perfMode) {
lsfgInitialize = LSFG_3_1P::initialize;
lsfgCreateContext = LSFG_3_1P::createContext;
lsfgPresentContext = LSFG_3_1P::presentContext;
}
Log::info("bench", "Running {}x benchmark with {}x{} extent and flow scale of {} {} HDR",
multiplier, width, height, flowScale, isHdr ? "with" : "without");
@ -55,7 +68,7 @@ namespace {
? std::stoull(std::string(lsfgDeviceUUID), nullptr, 16) : 0x1463ABAC;
Extract::extractShaders();
LSFG_3_1::initialize(
lsfgInitialize(
deviceUUID, // some magic number if not given
isHdr, 1.0F / flowScale, multiplier - 1,
[](const std::string& name) -> std::vector<uint8_t> {
@ -64,7 +77,7 @@ namespace {
return spirv;
}
);
const int32_t ctx = LSFG_3_1::createContext(-1, -1, {},
const int32_t ctx = lsfgCreateContext(-1, -1, {},
{ .width = width, .height = height },
isHdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM
);
@ -75,7 +88,7 @@ namespace {
const auto now = std::chrono::high_resolution_clock::now();
const uint64_t iterations = (8 * 500) + 1;
for (uint64_t count = 0; count < iterations; count++) {
LSFG_3_1::presentContext(ctx, -1, {});
lsfgPresentContext(ctx, -1, {});
if (count % 500 == 0)
Log::info("bench", "{:.2f}% done ({}/{})",