diff --git a/CMakeLists.txt b/CMakeLists.txt index 4b146ba..ea23caa 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,14 +6,14 @@ include(GNUInstallDirs) option(LSFGVK_BUILD_VK_LAYER "Build the Vulkan layer" ON) option(LSFGVK_BUILD_UI "Build the user interface" OFF) option(LSFGVK_BUILD_CLI "Build the command line interface" ON) -option(LSFGVK_INSTALL_DEVELOP "Install development libraries and headers" OFF) +option(LSFGVK_INSTALL_LIBRARIES "Install development libraries and headers" OFF) option(LSFGVK_INSTALL_XDG_FILES "Install the application icon and desktop files" OFF) set(LSFGVK_LAYER_LIBRARY_PATH liblsfg-vk-layer.so CACHE STRING "Change where Vulkan searches for the layer library") -option(LSFGVK_TESTING_RENDERDOC "Enable RenderDoc integration for testing purposes" OFF) # === READ HERE FOR BUILD OPTIONS === set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) +set(CMAKE_CXX_VISIBILITY_PRESET hidden) set(CMAKE_POSITION_INDEPENDENT_CODE ON) set(CMAKE_SKIP_RPATH ON) @@ -29,24 +29,20 @@ if(CMAKE_BUILD_TYPE STREQUAL "Debug") add_compile_options( # By default, enable all warnings -Weverything + -Wno-unknown-warning-option # Some warnings are incompatible with each other -Wno-pre-c++20-compat-pedantic -Wno-c++98-compat-pedantic -Wno-switch-default # Then there's code-style things I don't care about -Wno-missing-designated-field-initializers - -Wno-shadow + -Wno-unused-macros # And functional warning I don't care about either - -Wno-cast-function-type-strict -Wno-padded ) endif() endif() -if(LSFGVK_TESTING_RENDERDOC) - add_compile_definitions(LSFGVK_TESTING_RENDERDOC) -endif() - add_subdirectory(lsfg-vk-common) add_subdirectory(lsfg-vk-backend) if(LSFGVK_BUILD_VK_LAYER) diff --git a/docs/Building-From-Source.md b/docs/Building-From-Source.md index f997cbe..fc80bf3 100644 --- a/docs/Building-From-Source.md +++ b/docs/Building-From-Source.md @@ -76,7 +76,7 @@ However, lsfg-vk provides several CMake options to customize the build process: - `LSFGVK_BUILD_VK_LAYER`: Set to `On` to build the Vulkan layer (default is `On`). - `LSFGVK_BUILD_UI`: Set to `On` to build the user interface (default is `Off`). - `LSFGVK_BUILD_CLI`: Set to `On` to build the command-line interface (default is `On`). -- `LSFGVK_INSTALL_DEVELOP`: Set to `On` to install development files like headers and libraries (default is `Off`). +- `LSFGVK_INSTALL_LIBRARIES`: Set to `On` to install development files like headers and libraries (default is `Off`). - `LSFGVK_INSTALL_XDG_FILES`: Set to `On` to install XDG desktop files and icons (default is `Off`). - `LSFGVK_LAYER_LIBRARY_PATH`: Override the path to the Vulkan layer library (by default, Vulkan will search the systems library path). diff --git a/docs/Configuration.md b/docs/Configuration.md index a265f73..2354a96 100644 --- a/docs/Configuration.md +++ b/docs/Configuration.md @@ -10,7 +10,7 @@ Regardless of the method you choose, the concept of profiles remains the same. ### All Configuration Options Below is a list of all available **global** configuration options: -- **Path to Lossless Scaling / `dll`**: By default, lsfg-vk will search certain directories for Lossless Scaling. If you have Lossless Scaling installed in a custom location, you can specify the full path to the "Lossless.dll" file inside of Lossless Scaling here. +- **Path to Lossless Scaling / `dll`**: By default, lsfg-vk will search certain directories for Lossless Scaling. If you have Lossless Scaling installed in a custom location, you can specify the full path to the "lsfg-vk.dll" file inside of Lossless Scaling here. - **Allow half-precision / `allow_fp16`**: If enabled, this will allow lsfg-vk to take advantage of half-precision shader operations if supported by the GPU. This has a giant performance uplift on AMD GPUs, but does not affect NVIDIA GPUs (GTX 1000-series or older cards will actually see a big performance **decrease**). This option **does not** influence quality. (Default: `true`) Next is a list of all available **profile** configuration options: @@ -18,12 +18,12 @@ Next is a list of all available **profile** configuration options: - **Active In / `active_in`**: A list of 1) linux binary names, such as `mpv`, 2) windows executables, such as `GenshinImpact.exe` and 3) process names, such as `GameThread`. It is also possible to specify the last part of a path (e.g. `Ghostrunner2/Binaries/Win64/Ghostrunner2-Win64-Shipping.exe`). When a process matching one of these rules is detected, this profile will be activated. - **Multiplier / `multiplier`**: The frame generation multiplier. A value of 3 means that for every frame rendered by the application, lsfg-vk will generate 2 additional frames. (Default: `2`) - **Flow Scale / `flow_scale`**: The resolution scale at which the motion vectors are calculated. A lower value means better performance, but worse quality. (Default: `1.0`) -- **Performance Mode / `performance_mode`**: When enabled, a significantly lighter frame generation model is used. This has a minor quality impact, but greatly improves performance. +- **Performance Mode / `performance_mode`**: When enabled, a significantly lighter frame generation model is used. This has a minor quality impact, but greatly improves performance. (Default: `false`) - **Pacing Mode / `pacing`**: This option is explained in greater detail below. Supported values are **None / `none`**. - **GPU / `gpu`**: The GPU to use for frame generation. This MUST be the **same GPU** as the one being used by the application. **Dual GPU is NOT supported**. You can identify a GPU through its name (e.g. `NVIDIA GeForce RTX 3080`), uppercase-only ID (e.g. `0x10DE:0x2C02`) or PCI bus ID (e.g. `3:0.0`). If not specified, the primary GPU will be used, which may lead to issues. -The "Multiplier", "Flow Scale" and "Performance Mode" options can be **hot-reloaded**, meaning that changes to these options will take effect immediately without needing to restart the application. Options such as "Pacing Mode" or removal of the profile require a swapchain recreation, which usually means resizing or restarting the application. Any other change requires an application restart. +The "Multiplier", "Flow Scale" and "Performance Mode" options can be **hot-reloaded**, meaning that changes to these options will take effect immediately without needing to restart the application. Options such as "Pacing Mode" or removal of the profile require a swapchain recreation, which usually means resizing or restarting the application. Any other change requires an application restart. ### Pacing Modes @@ -45,7 +45,7 @@ The following environment variables affect lsfg-vk: - `LSFGVK_PROFILE`: Name of the profile to use. If set, this will override automatic profile detection. If you do not wish to use a configuration file, you can also set configuration options through environment variables. To do this, set `LSFGVK_ENV=1` and then any of the following variables: -- `LSFGVK_DLL_PATH`: Path to Lossless Scaling DLL. +- `LSFGVK_DLL_PATH`: Path to lsfg-vk DLL. - `LSFGVK_NO_FP16`: If set to `1`, half-precision will be disabled. - `LSFGVK_MULTIPLIER`: Frame generation multiplier. - `LSFGVK_FLOW_SCALE`: Flow scale value. diff --git a/lsfg-vk-backend/.clang-tidy b/lsfg-vk-backend/.clang-tidy index 2b3d77b..deeea07 100644 --- a/lsfg-vk-backend/.clang-tidy +++ b/lsfg-vk-backend/.clang-tidy @@ -23,8 +23,3 @@ Checks: - -cppcoreguidelines-macro-usage - -bugprone-easily-swappable-parameters - -portability-avoid-pragma-once -# Vulkan requires the use of reinterpret/const casts in many places -- -cppcoreguidelines-pro-type-reinterpret-cast -- -cppcoreguidelines-pro-type-const-cast -# We use namespace forward declarations -- -bugprone-forward-declaration-namespace diff --git a/lsfg-vk-backend/CMakeLists.txt b/lsfg-vk-backend/CMakeLists.txt index e14fc72..a92decf 100644 --- a/lsfg-vk-backend/CMakeLists.txt +++ b/lsfg-vk-backend/CMakeLists.txt @@ -1,33 +1,23 @@ set(BACKEND_SOURCES - "src/extraction/dll_reader.cpp" - "src/extraction/shader_registry.cpp" - "src/helpers/limits.cpp" - "src/helpers/managed_shader.cpp" - "src/helpers/utils.cpp" - "src/shaderchains/alpha0.cpp" - "src/shaderchains/alpha1.cpp" - "src/shaderchains/beta0.cpp" - "src/shaderchains/beta1.cpp" - "src/shaderchains/delta0.cpp" - "src/shaderchains/delta1.cpp" - "src/shaderchains/gamma0.cpp" - "src/shaderchains/gamma1.cpp" - "src/shaderchains/generate.cpp" - "src/shaderchains/mipmaps.cpp" + "src/modules/library/dll.cpp" + "src/modules/library.cpp" + "src/modules/pipeline.cpp" + "src/utility/pipelines.cpp" + "src/utility/vkhelper.cpp" "src/lsfgvk.cpp") add_library(lsfg-vk-backend STATIC ${BACKEND_SOURCES}) target_include_directories(lsfg-vk-backend - PUBLIC include) + PUBLIC include + PRIVATE src) -target_link_libraries(lsfg-vk-backend - PUBLIC lsfg-vk-common) +target_compile_options(lsfg-vk-backend PUBLIC + $<$:-fconstexpr-steps=4290000000> + $<$:-fconstexpr-ops-limit=4290000000> +) -set_target_properties(lsfg-vk-backend PROPERTIES - CXX_VISIBILITY_PRESET hidden) - -if(LSFGVK_INSTALL_DEVELOP) +if(LSFGVK_INSTALL_LIBRARIES) install(TARGETS lsfg-vk-backend ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") install(DIRECTORY "include/lsfg-vk-backend/" diff --git a/lsfg-vk-backend/include/lsfg-vk-backend/lsfgvk.hpp b/lsfg-vk-backend/include/lsfg-vk-backend/lsfgvk.hpp deleted file mode 100644 index eacb98e..0000000 --- a/lsfg-vk-backend/include/lsfg-vk-backend/lsfgvk.hpp +++ /dev/null @@ -1,143 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -namespace lsfgvk::backend { - - class [[gnu::visibility("default")]] ContextImpl; - class [[gnu::visibility("default")]] InstanceImpl; - - using Context = ContextImpl; - - /// - /// Primitive exception class that deliveres a detailed error message - /// - class [[gnu::visibility("default")]] error : public std::runtime_error { - public: - /// - /// Construct an error - /// - /// @param msg Error message. - /// @param inner Inner exception. - /// - explicit error(const std::string &msg, const std::exception &inner); - - /// - /// Construct an error - /// - /// @param msg Error message. - /// - explicit error(const std::string &msg); - - error(const error &) = default; - error &operator=(const error &) = default; - error(error &&) = default; - error &operator=(error &&) = default; - ~error() override; - }; - - /// Function type for picking a device based on its name and IDs - using DevicePicker = std::function ids, // (vendor ID, device ID) 0xXXXX format - const std::optional& pci // (bus:slot.func) if available, no padded zeros - )>; - - /// - /// Main entry point of the library - /// - class [[gnu::visibility("default")]] Instance { - public: - /// - /// Create a lsfg-vk instance - /// - /// @param devicePicker Function that picks a physical device based on some identifiers. - /// @param shaderDllPath Path to the Lossless.dll file to load shaders from. - /// @param allowLowPrecision Whether to load low-precision (FP16) shaders if supported. - /// - /// @throws backend::error on failure - /// - Instance( - const DevicePicker& devicePicker, - const std::filesystem::path& shaderDllPath, - bool allowLowPrecision - ); - - /// - /// Open a frame generation context. - /// - /// The VkFormat of the exchanged images is inferred from whether hdr is true or false: - /// - false: VK_FORMAT_R8G8B8A8_UNORM - /// - true: VK_FORMAT_R16G16B16A16_SFLOAT - /// - /// The application and library must keep track of the frame index. When the next frame - /// is ready, signal the syncFd with one increment (with the first trigger being 1). - /// Each generated frame will increment the semaphore by one: - /// - Application signals 1 -> Start generating with (curr, next) source images - /// - Library signals 1 -> First frame between (curr, next) is ready - /// - Library signals N -> N-th frame between (curr, next) is ready - /// - Application signals N+1 -> Start generating with (next, curr) source images - /// - /// @param sourceFds Pair of file descriptors for the source images alternated between. - /// @param destFds Vector with file descriptors to import output images from. - /// @param syncFd File descriptor for the timeline semaphore used for synchronization. - /// @param width Width of the images. - /// @param height Height of the images. - /// @param hdr Whether the images are HDR. - /// @param flow Motion flow factor. - /// @param perf Whether to enable performance mode. - /// - /// @throws backend::error on failure - /// - Context& openContext( - std::pair sourceFds, - const std::vector& destFds, - int syncFd, - uint32_t width, uint32_t height, - bool hdr, float flow, bool perf - ); - - /// - /// Schedule a new set of generated frames. - /// - /// @param context Context to use. - /// @throws backend::error on failure - /// - void scheduleFrames(Context& context); - - /// - /// Close a frame generation context - /// - /// @param context Context to close. - /// - void closeContext(const Context& context); - - // Non-copyable and non-movable - Instance(const Instance&) = delete; - Instance& operator=(const Instance&) = delete; - Instance(Instance&&) = delete; - Instance& operator=(Instance&&) = delete; - virtual ~Instance(); - private: - std::unique_ptr m_impl; - - std::vector> m_contexts; - }; - - /// - /// Make all lsfg-vk instances leaking. - /// This is to workaround a bug in the Vulkan loader, which - /// makes it impossible to destroy Vulkan instances and devices. - /// - void makeLeaking(); - -} diff --git a/lsfg-vk-backend/include/lsfg-vk/lsfgvk.hpp b/lsfg-vk-backend/include/lsfg-vk/lsfgvk.hpp new file mode 100644 index 0000000..b55ac86 --- /dev/null +++ b/lsfg-vk-backend/include/lsfg-vk/lsfgvk.hpp @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#pragma once + +#include +#include +#include +#include + +#ifdef LSFGVK_PRIV +#include +#endif // LSFGVK_PRIV + +namespace lsfgvk { + + /// Forward declaration of implementation classes + namespace priv { + struct [[gnu::visibility("default")]] Instance; + struct [[gnu::visibility("default")]] Context; + } + + /// + /// Main entrypoint of the library + /// + class [[gnu::visibility("default")]] Instance { + friend class Context; + public: + /// + /// Create a lsfg-vk instance + /// + /// The device identifier may be one of: + /// - Device name (e.g. "NVIDIA GeForce RTX 5080") + /// - Vendor ID + Device ID in lowercase hexadecimal (e.g. "10de:2c02") + /// - PCI bus ID with padded zeroes (e.g. "0000:01:00.0") + /// + /// @param deviceId Device identifier (see above) + /// @param lsfgvkDllPath Path to the lsfg-vk DLL file + /// @param allowFP16 Whether to allow usage of fp16 shader variants + /// @throws std::runtime_error on failure + /// + Instance( + const std::string& deviceId, + const std::filesystem::path& lsfgvkDllPath, + bool allowFP16 + ); + +#ifdef LSFGVK_PRIV + /// Get the underlying Vulkan instance handle + /// @return Vulkan instance + [[nodiscard]] VkInstance _instance() const; + + /// Get the underlying Vulkan device handle + /// @return Vulkan device + [[nodiscard]] VkDevice _device() const; +#endif // LSFGVK_PRIV + + // Non-copyable, non-movable + Instance(const Instance&) = delete; + Instance& operator=(const Instance&) = delete; + Instance(Instance&&) = delete; + Instance& operator=(Instance&&) = delete; + ~Instance(); + private: + std::unique_ptr m_priv; + }; + + /// + /// File descriptors exported from a context, the user must close them after use. + /// + struct FileDescriptors { + /// + /// File descriptor for a Vulkan memory allocation containing + /// a 2D array of RGBA8 pixels with length 2 and optimal allocation. + /// + /// Starting at iteration 0, the next frame for which frames should be interpolated + /// inbetween should be placed in image `iteration % 2`. + /// + int sourceFd; + + /// + /// File descriptor for a Vulkan memory allocation containing a single RGBA8 + /// image into which each generated frame will be written to. + /// + int destinationFd; + + /// + /// File descriptor for a timeline semaphore. When scheduling frames for generation, + /// a specific value is waited for and signaled on return. It is up to the user to ensure + /// the destination image is not overwritten before it is read. + /// + int syncFd; + }; + + /// A context for generating frames + /// + class [[gnu::visibility("default")]] Context { + public: + /// + /// Create a frame generation context + /// + /// @param instance Parent instance + /// @param width Image width + /// @param height Image height + /// @param flowScale Flow estimation scale factor + /// @param performanceMode Whether to enable performance mode + /// @throws std::runtime_error on failure + /// + Context( + const Instance& instance, + uint32_t width, + uint32_t height, + float flowScale, + bool performanceMode + ); + + /// + /// Export the internal resources + /// + /// @return File descriptors for internal resources + /// @throws std::runtime_error on failure + /// + [[nodiscard]] FileDescriptors exportFds() const; + + /// + /// Dispatch frame generation + /// + /// Let `so - 1` be the current value of the timeline semaphore, starting at 0. + /// The user must signal `so` to start the generation of the next frame, after + /// which lsfg-vk will signal `so + 1`. The user must ensure the previously + /// generated frame is read before signaling the next one (at `so + 2` and so on). + /// + /// @param total Total number of frames to generate + /// @throws std::runtime_error on failure + /// + void dispatch(uint32_t total); + + /// + /// Wait for the device to be idle + /// + void idle() const; + + // Non-copyable, non-movable + Context(const Context&) = delete; + Context& operator=(const Context&) = delete; + Context(Context&&) = delete; + Context& operator=(Context&&) = delete; + ~Context(); + private: + std::unique_ptr m_priv; + }; + +} diff --git a/lsfg-vk-backend/src/extraction/dll_reader.hpp b/lsfg-vk-backend/src/extraction/dll_reader.hpp deleted file mode 100644 index a73e899..0000000 --- a/lsfg-vk-backend/src/extraction/dll_reader.hpp +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include -#include -#include -#include - -namespace lsfgvk::backend { - - /// extract all resources from a DLL file - /// @param dll path to the DLL file - /// @return map of resource IDs to their binary data - /// @throws ls::error on various failure points - std::unordered_map> extractResourcesFromDLL( - const std::filesystem::path& dll); - -} diff --git a/lsfg-vk-backend/src/extraction/shader_registry.cpp b/lsfg-vk-backend/src/extraction/shader_registry.cpp deleted file mode 100644 index b9c65e6..0000000 --- a/lsfg-vk-backend/src/extraction/shader_registry.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "shader_registry.hpp" -#include "lsfg-vk-common/helpers/errors.hpp" -#include "lsfg-vk-common/vulkan/shader.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include -#include -#include -#include -#include - -using namespace lsfgvk; -using namespace lsfgvk::backend; - -namespace { - /// get the source code for a shader - const std::vector& getShaderSource(uint32_t id, bool fp16, bool perf, - const std::unordered_map>& resources) { - const size_t BASE_OFFSET = 49; - const size_t OFFSET_PERF = 23; - const size_t OFFSET_FP32 = 49; - - auto it = resources.find(BASE_OFFSET + id + - (perf ? OFFSET_PERF : 0) + - (fp16 ? 0 : OFFSET_FP32)); - if (it == resources.end()) - throw ls::error("unable to find shader with id: " + std::to_string(id)); - - return it->second; - } - /// patch the generate shader - void patchGenerateShader(std::vector& data, bool hdr) { -#pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunknown-warning-option" -#pragma clang diagnostic ignored "-Wunsafe-buffer-usage-in-container" - auto* _ptr = data.data(); - const std::span words( - reinterpret_cast(_ptr), - data.size() / sizeof(uint32_t) - ); -#pragma clang diagnostic pop - - const uint16_t SpvOpCapability = 17; - const uint16_t SpvOpTypeImage = 25; - const uint32_t SpvCapabilityStorageImageWriteWithoutFormat = 56; - const uint32_t SpvCapabilityShader = 1; - const uint32_t SpvImageFormatRgba16f = 2; - const uint32_t SpvImageFormatRgba8 = 4; - - for (size_t i = 5; i < words.size();) { - const uint32_t& word = words[i]; // NOLINT ([]-usage) - const uint16_t wc = (word >> 16); - const uint16_t op = word & 0xFFFF; - - // remove write without format capability - if (op == SpvOpCapability && wc >= 2) { - uint32_t& cap = words[i + 1]; // NOLINT ([]-usage) - if (cap == SpvCapabilityStorageImageWriteWithoutFormat) - cap = SpvCapabilityShader; - } - - // patch format in image instructions - if (op == SpvOpTypeImage && wc >= 9) { - const uint32_t sampled = words[i + 7]; // NOLINT ([]-usage) - if (sampled == 2) - words[i + 8] = // NOLINT ([]-usage) - hdr ? SpvImageFormatRgba16f : SpvImageFormatRgba8; - } - - i += wc ? wc : 1; - } - } -} - -ShaderRegistry backend::buildShaderRegistry(const vk::Vulkan& vk, bool fp16, - const std::unordered_map>& resources) { - // patch the generate shader - std::vector generate_data = getShaderSource(256, fp16, false, resources); - std::vector generate_data_hdr = generate_data; - patchGenerateShader(generate_data, false); - patchGenerateShader(generate_data_hdr, true); - - // load all other shaders -#define SHADER(id, p1, p2, p3, p4) \ - vk::Shader(vk, getShaderSource(id, fp16, PERF, resources), \ - p1, p2, p3, p4) - - return { -#define PERF false - .mipmaps = SHADER(255, 1, 7, 1, 1), - .generate = vk::Shader(vk, generate_data, 5, 1, 1, 2), - .generate_hdr = vk::Shader(vk, generate_data_hdr, 5, 1, 1, 2), - .quality = { - .alpha = { - SHADER(267, 1, 2, 0, 1), - SHADER(268, 2, 2, 0, 1), - SHADER(269, 2, 4, 0, 1), - SHADER(270, 4, 4, 0, 1) - }, - .beta = { - SHADER(275, 12, 2, 0, 1), - SHADER(276, 2, 2, 0, 1), - SHADER(277, 2, 2, 0, 1), - SHADER(278, 2, 2, 0, 1), - SHADER(279, 2, 6, 1, 1) - }, - .gamma = { - SHADER(257, 9, 3, 1, 2), - SHADER(259, 3, 4, 0, 1), - SHADER(260, 4, 4, 0, 1), - SHADER(261, 4, 4, 0, 1), - SHADER(262, 6, 1, 1, 2) - }, - .delta = { - SHADER(257, 9, 3, 1, 2), - SHADER(263, 3, 4, 0, 1), - SHADER(264, 4, 4, 0, 1), - SHADER(265, 4, 4, 0, 1), - SHADER(266, 6, 1, 1, 2), - SHADER(258, 10, 2, 1, 2), - SHADER(271, 2, 2, 0, 1), - SHADER(272, 2, 2, 0, 1), - SHADER(273, 2, 2, 0, 1), - SHADER(274, 3, 1, 1, 2) - } - }, -#undef PERF -#define PERF true - .performance = { - .alpha = { - SHADER(267, 1, 1, 0, 1), - SHADER(268, 1, 1, 0, 1), - SHADER(269, 1, 2, 0, 1), - SHADER(270, 2, 2, 0, 1) - }, - .beta = { - SHADER(275, 6, 2, 0, 1), - SHADER(276, 2, 2, 0, 1), - SHADER(277, 2, 2, 0, 1), - SHADER(278, 2, 2, 0, 1), - SHADER(279, 2, 6, 1, 1) - }, - .gamma = { - SHADER(257, 5, 3, 1, 2), - SHADER(259, 3, 2, 0, 1), - SHADER(260, 2, 2, 0, 1), - SHADER(261, 2, 2, 0, 1), - SHADER(262, 4, 1, 1, 2) - }, - .delta = { - SHADER(257, 5, 3, 1, 2), - SHADER(263, 3, 2, 0, 1), - SHADER(264, 2, 2, 0, 1), - SHADER(265, 2, 2, 0, 1), - SHADER(266, 4, 1, 1, 2), - SHADER(258, 6, 1, 1, 2), - SHADER(271, 1, 1, 0, 1), - SHADER(272, 1, 1, 0, 1), - SHADER(273, 1, 1, 0, 1), - SHADER(274, 2, 1, 1, 2) - } - }, -#undef PERF - .is_fp16 = fp16 - }; - -#undef SHADER -} diff --git a/lsfg-vk-backend/src/extraction/shader_registry.hpp b/lsfg-vk-backend/src/extraction/shader_registry.hpp deleted file mode 100644 index e2dcee8..0000000 --- a/lsfg-vk-backend/src/extraction/shader_registry.hpp +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "lsfg-vk-common/vulkan/shader.hpp" - -#include -#include -#include -#include - -namespace lsfgvk::backend { - - /// shader collection struct - struct Shaders { - std::array alpha; - std::array beta; - std::array gamma; - std::array delta; - }; - - /// shader registry struct - struct ShaderRegistry { - vk::Shader mipmaps; - vk::Shader generate, generate_hdr; - Shaders quality; - Shaders performance; - - bool is_fp16; //!< whether the fp16 shader variants were loaded - }; - - /// build a shader registry from resources - /// @param vk Vulkan instance - /// @param fp16 whether to load fp16 variants - /// @param resources map of resource IDs to their binary data - /// @return constructed shader registry - /// @throws ls::error if shaders are missing - /// @throws vk::vulkan_error on Vulkan errors - ShaderRegistry buildShaderRegistry(const vk::Vulkan& vk, bool fp16, - const std::unordered_map>& resources); - -} diff --git a/lsfg-vk-backend/src/helpers/limits.cpp b/lsfg-vk-backend/src/helpers/limits.cpp deleted file mode 100644 index 32e5e00..0000000 --- a/lsfg-vk-backend/src/helpers/limits.cpp +++ /dev/null @@ -1,56 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "limits.hpp" - -#include "lsfg-vk-common/vulkan/descriptor_pool.hpp" - -#include -#include - -using namespace lsfgvk; -using namespace lsfgvk::backend; - -namespace { - const vk::Limits BASE_LIMITS{ - .sets = 51, - .uniform_buffers = 3, - .samplers = 51, - .sampled_images = 165, - .storage_images = 172 - }; - const vk::Limits BASE_LIMITS_PERF{ - .sampled_images = 91, - .storage_images = 102 - }; - const vk::Limits GEN_LIMITS{ - .sets = 93, - .uniform_buffers = 54, - .samplers = 147, - .sampled_images = 567, - .storage_images = 261 - }; - const vk::Limits GEN_LIMITS_PERF{ - .sampled_images = 339, - .storage_images = 183 - }; -} - -vk::Limits backend::calculateDescriptorPoolLimits(size_t count, bool perf) { - const auto m = static_cast(count); - - vk::Limits a{BASE_LIMITS}; - vk::Limits b{GEN_LIMITS}; - if (perf) { - a.sampled_images = BASE_LIMITS_PERF.sampled_images; - b.sampled_images = GEN_LIMITS_PERF.sampled_images; - a.storage_images = BASE_LIMITS_PERF.storage_images; - b.storage_images = GEN_LIMITS_PERF.storage_images; - } - - a.sets += b.sets * m; - a.uniform_buffers += b.uniform_buffers * m; - a.samplers += b.samplers * m; - a.sampled_images += b.sampled_images * m; - a.storage_images += b.storage_images * m; - return a; -} diff --git a/lsfg-vk-backend/src/helpers/limits.hpp b/lsfg-vk-backend/src/helpers/limits.hpp deleted file mode 100644 index 3647f54..0000000 --- a/lsfg-vk-backend/src/helpers/limits.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "lsfg-vk-common/vulkan/descriptor_pool.hpp" - -#include - -namespace lsfgvk::backend { - /// calculate limits for descriptor pools - /// @param count number of images - /// @param perf whether performance mode is enabled - /// @return calculated limits - vk::Limits calculateDescriptorPoolLimits(size_t count, bool perf); -} diff --git a/lsfg-vk-backend/src/helpers/managed_shader.cpp b/lsfg-vk-backend/src/helpers/managed_shader.cpp deleted file mode 100644 index 0b35ad4..0000000 --- a/lsfg-vk-backend/src/helpers/managed_shader.cpp +++ /dev/null @@ -1,128 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "managed_shader.hpp" -#include "lsfg-vk-common/vulkan/buffer.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/descriptor_pool.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/sampler.hpp" -#include "lsfg-vk-common/vulkan/shader.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include -#include -#include - -#include - -using namespace lsfgvk; -using namespace lsfgvk::backend; - -ManagedShaderBuilder& ManagedShaderBuilder::sampled(const vk::Image& image) { - this->sampledImages.push_back(std::ref(image)); - return *this; -} - -ManagedShaderBuilder& ManagedShaderBuilder::sampleds( - const std::vector& images, - size_t offset, size_t count) { - if (count == 0 || offset + count > images.size()) - count = images.size() - offset; - - for (size_t i = 0; i < count; ++i) - this->sampledImages.push_back(std::ref(images.at(offset + i))); - return *this; -} - - -ManagedShaderBuilder& ManagedShaderBuilder::storage(const vk::Image& image) { - this->storageImages.push_back(std::ref(image)); - return *this; -} - -ManagedShaderBuilder& ManagedShaderBuilder::storages( - const std::vector& images, - size_t offset, size_t count) { - if (count == 0 || offset + count > images.size()) - count = images.size() - offset; - - for (size_t i = 0; i < count; ++i) - this->storageImages.push_back(std::ref(images.at(offset + i))); - return *this; -} - -ManagedShaderBuilder& ManagedShaderBuilder::sampler(const vk::Sampler& sampler) { - this->imageSamplers.push_back(std::ref(sampler)); - return *this; -} - -ManagedShaderBuilder& ManagedShaderBuilder::samplers( - const std::vector& samplers) { - for (const auto& sampler : samplers) - this->imageSamplers.push_back(std::ref(sampler)); - return *this; -} - -ManagedShaderBuilder& ManagedShaderBuilder::buffer(const vk::Buffer& buffer) { - this->constantBuffers.push_back(std::ref(buffer)); - return *this; -} - -ManagedShader ManagedShaderBuilder::build(const vk::Vulkan& vk, - const vk::DescriptorPool& pool, const vk::Shader& shader) const { - std::vector barriers; - barriers.reserve(this->storageImages.size() + this->sampledImages.size()); - - for (const auto& img : this->sampledImages) - barriers.push_back({ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = img.get().handle(), - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .levelCount = 1, - .layerCount = 1 - } - }); - for (const auto& img : this->storageImages) - barriers.push_back({ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .srcAccessMask = VK_ACCESS_SHADER_READ_BIT, - .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_GENERAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = img.get().handle(), - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .levelCount = 1, - .layerCount = 1 - } - }); - - return { - std::ref(shader), - std::move(barriers), - vk::DescriptorSet(vk, pool, shader, - this->sampledImages, - this->storageImages, - this->imageSamplers, - this->constantBuffers) - }; -} - -void ManagedShader::dispatch(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, - VkExtent2D extent) const { - cmd.dispatch(vk, this->shader, - this->descriptorSet, - this->barriers, - extent.width, extent.height, 1 - ); -} diff --git a/lsfg-vk-backend/src/helpers/managed_shader.hpp b/lsfg-vk-backend/src/helpers/managed_shader.hpp deleted file mode 100644 index e0d673f..0000000 --- a/lsfg-vk-backend/src/helpers/managed_shader.hpp +++ /dev/null @@ -1,98 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/descriptor_pool.hpp" -#include "lsfg-vk-common/vulkan/descriptor_set.hpp" -#include "lsfg-vk-common/vulkan/shader.hpp" - -#include -#include - -#include - -namespace lsfgvk::backend { - - /// managed shader handling dispatch and barriers - /// this class is NOT memory-safe - class ManagedShader { - friend class ManagedShaderBuilder; - public: - /// dispatch the managed shader - /// @param vk the vulkan instance - /// @param cmd command buffer to use - /// @param extent dispatch size - /// @throws ls::vulkan_error on failure - void dispatch(const vk::Vulkan& vk, - const vk::CommandBuffer& cmd, VkExtent2D extent) const; - private: - ls::R shader; - - std::vector barriers; - vk::DescriptorSet descriptorSet; - - // simple move constructor - ManagedShader(ls::R shader, - std::vector barriers, - vk::DescriptorSet descriptorSet) : - shader(shader), - barriers(std::move(barriers)), - descriptorSet(std::move(descriptorSet)) { - } - }; - - /// class for building managed shaders - /// this class is NOT memory-safe - class ManagedShaderBuilder { - public: - /// default constructor - ManagedShaderBuilder() = default; - - /// add a sampled image - /// @param image image to add - [[nodiscard]] ManagedShaderBuilder& sampled(const vk::Image& image); - /// add multiple sampled images - /// @param images images to add - /// @param offset offset into images - /// @param count number of images to add (0 = all) - [[nodiscard]] ManagedShaderBuilder& sampleds(const std::vector& images, - size_t offset = 0, size_t count = 0); - - /// add a storage image - /// @param image image to add - [[nodiscard]] ManagedShaderBuilder& storage(const vk::Image& image); - /// add multiple storage images - /// @param images images to add - /// @param offset offset into images - /// @param count number of images to add (0 = all) - [[nodiscard]] ManagedShaderBuilder& storages(const std::vector& images, - size_t offset = 0, size_t count = 0); - - /// add a sampler - /// @param sampler sampler to add - [[nodiscard]] ManagedShaderBuilder& sampler(const vk::Sampler& sampler); - /// add multiple samplers - /// @param samplers samplers to add - [[nodiscard]] ManagedShaderBuilder& samplers(const std::vector& samplers); - - /// add a buffer - /// @param buffer buffer to add - [[nodiscard]] ManagedShaderBuilder& buffer(const vk::Buffer& buffer); - - /// build the managed shader - /// @param vk the vulkan instance - /// @param pool the descriptor pool to use - /// @param shader the shader to use - /// @returns the built managed shader - [[nodiscard]] ManagedShader build(const vk::Vulkan& vk, - const vk::DescriptorPool& pool, const vk::Shader& shader) const; - private: - std::vector> sampledImages; - std::vector> storageImages; - std::vector> imageSamplers; - std::vector> constantBuffers; - }; - -} diff --git a/lsfg-vk-backend/src/helpers/utils.cpp b/lsfg-vk-backend/src/helpers/utils.cpp deleted file mode 100644 index 3c310cd..0000000 --- a/lsfg-vk-backend/src/helpers/utils.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "utils.hpp" - -#include -#include -#include -#include - -#include - -using namespace lsfgvk; -using namespace lsfgvk::backend; - -ConstantBuffer backend::getDefaultConstantBuffer( - size_t index, size_t total, - bool hdr, float invFlow) { - return ConstantBuffer { - .advancedColorKind = hdr ? 2U : 0U, - .hdrSupport = hdr ? 1U : 0U, - .resolutionInvScale = invFlow, - .timestamp = static_cast(index + 1) / static_cast(total + 1), - .uiThreshold = 0.5F - }; -} - -VkExtent2D backend::shift_extent(VkExtent2D extent, uint32_t i) { - return VkExtent2D{ - .width = extent.width >> i, - .height = extent.height >> i - }; -} - -VkExtent2D backend::add_shift_extent(VkExtent2D extent, uint32_t a, uint32_t i) { - return VkExtent2D{ - .width = (extent.width + a) >> i, - .height = (extent.height + a) >> i - }; -} - -std::string backend::to_hex_id(uint32_t id) { - const std::array chars = std::to_array("0123456789ABCDEF"); - - std::string result = "0x"; - result += chars.at((id >> 12) & 0xF); - result += chars.at((id >> 8) & 0xF); - result += chars.at((id >> 4) & 0xF); - result += chars.at(id & 0xF); - return result; -} diff --git a/lsfg-vk-backend/src/helpers/utils.hpp b/lsfg-vk-backend/src/helpers/utils.hpp deleted file mode 100644 index 202ac0c..0000000 --- a/lsfg-vk-backend/src/helpers/utils.hpp +++ /dev/null @@ -1,82 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "../extraction/shader_registry.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/buffer.hpp" -#include "lsfg-vk-common/vulkan/descriptor_pool.hpp" -#include "lsfg-vk-common/vulkan/sampler.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include -#include -#include -#include - -#include - -namespace lsfgvk::backend { - /// exposed context data - struct Ctx { - ls::R vk; // safe back reference - ls::R shaders; // safe back reference - - vk::DescriptorPool pool; - - vk::Buffer constantBuffer; - std::vector constantBuffers; - vk::Sampler bnbSampler; //!< border, no compare, black - vk::Sampler bnwSampler; //!< border, no compare, white - vk::Sampler eabSampler; //!< edge, always compare, black - - VkExtent2D sourceExtent; - VkExtent2D flowExtent; - - bool hdr; - float flow; - bool perf; - size_t count; - }; - - /// constant buffer used in shaders - struct ConstantBuffer { - std::array inputOffset; - uint32_t firstIter; - uint32_t firstIterS; - uint32_t advancedColorKind; - uint32_t hdrSupport; - float resolutionInvScale; - float timestamp; - float uiThreshold; - std::array pad; - }; - - /// get a prefilled constant buffer - /// @param index timestamp index - /// @param total total amount of images - /// @param hdr whether HDR is enabled - /// @param invFlow inverted flow scale value - /// @return prefilled constant buffer - ConstantBuffer getDefaultConstantBuffer( - size_t index, size_t total, - bool hdr, float invFlow - ); - - /// round down a VkExtent2D - /// @param extent the extent to shift - /// @param i the amount to shift by - /// @return the shifted extent - VkExtent2D shift_extent(VkExtent2D extent, uint32_t i); - - /// round up a VkExtent2D - /// @param extent the extent to shift - /// @param a the amount to add before shifting - /// @param i the amount to shift by - /// @return the shifted extent - VkExtent2D add_shift_extent(VkExtent2D extent, uint32_t a, uint32_t i); - - /// convert a device/vendor id into a hex string - std::string to_hex_id(uint32_t id); -} diff --git a/lsfg-vk-backend/src/lsfgvk.cpp b/lsfg-vk-backend/src/lsfgvk.cpp index c94cfe1..7bda40f 100644 --- a/lsfg-vk-backend/src/lsfgvk.cpp +++ b/lsfg-vk-backend/src/lsfgvk.cpp @@ -1,666 +1,241 @@ /* SPDX-License-Identifier: GPL-3.0-or-later */ -#include "lsfg-vk-backend/lsfgvk.hpp" -#include "extraction/dll_reader.hpp" -#include "extraction/shader_registry.hpp" -#include "helpers/limits.hpp" -#include "helpers/utils.hpp" -#include "lsfg-vk-common/helpers/errors.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/buffer.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/fence.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/timeline_semaphore.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" -#include "shaderchains/alpha0.hpp" -#include "shaderchains/alpha1.hpp" -#include "shaderchains/beta0.hpp" -#include "shaderchains/beta1.hpp" -#include "shaderchains/delta0.hpp" -#include "shaderchains/delta1.hpp" -#include "shaderchains/gamma0.hpp" -#include "shaderchains/gamma1.hpp" -#include "shaderchains/generate.hpp" -#include "shaderchains/mipmaps.hpp" +#include "lsfgvk.hpp" +#include "modules/library.hpp" +#include "modules/pipeline.hpp" +#include "utility/pipelines.hpp" +#include "utility/vkhelper.hpp" -#include -#include -#include #include -#include -#include #include #include -#include #include -#include #include #include -#include #include -#include - -#include - -#ifdef LSFGVK_TESTING_RENDERDOC -#include -#include -#endif using namespace lsfgvk; -using namespace lsfgvk::backend; - -namespace lsfgvk::backend { - error::error(const std::string& msg, const std::exception& inner) - : std::runtime_error(msg + "\n- " + inner.what()) {} - error::error(const std::string& msg) - : std::runtime_error(msg) {} - error::~error() = default; - - /// instance class - class InstanceImpl { - public: - /// create an instance - /// (see lsfg-vk documentation) - InstanceImpl(vk::PhysicalDeviceSelector selectPhysicalDevice, - const std::filesystem::path& shaderDllPath, - bool allowLowPrecision); - - /// get the Vulkan instance - /// @return the Vulkan instance - [[nodiscard]] const auto& getVulkan() const { return this->vk; } - /// get the shader registry - /// @return the shader registry - [[nodiscard]] const auto& getShaderRegistry() const { return this->shaders; } -#ifdef LSFGVK_TESTING_RENDERDOC - /// get the RenderDoc API - /// @return the RenderDoc API - [[nodiscard]] const auto& getRenderDocAPI() const { return this->renderdoc; } -#endif - // Movable, non-copyable, custom destructor - InstanceImpl(const InstanceImpl&) = delete; - InstanceImpl& operator=(const InstanceImpl&) = delete; - InstanceImpl(InstanceImpl&&) = default; - InstanceImpl& operator=(InstanceImpl&&) = default; - ~InstanceImpl(); - private: - vk::Vulkan vk; - ShaderRegistry shaders; - -#ifdef LSFGVK_TESTING_RENDERDOC - std::optional renderdoc; -#endif - }; - - /// context class - class ContextImpl { - public: - /// create a context - /// (see lsfg-vk documentation) - ContextImpl(const InstanceImpl& instance, - std::pair sourceFds, const std::vector& destFds, int syncFd, - VkExtent2D extent, bool hdr, float flow, bool perf); - - /// schedule frames - /// (see lsfg-vk documentation) - void scheduleFrames(); - private: - std::pair sourceImages; - std::vector destImages; - vk::Image blackImage; - - vk::TimelineSemaphore syncSemaphore; // imported - vk::TimelineSemaphore prepassSemaphore; - size_t idx{1}; - size_t fidx{0}; // real frame index - - std::vector cmdbufs; - vk::Fence cmdbufFence; - - Ctx ctx; - - Mipmaps mipmaps; - std::array alpha0; - std::array alpha1; - Beta0 beta0; - Beta1 beta1; - struct Pass { - std::vector gamma0; - std::vector gamma1; - - std::vector delta0; - std::vector delta1; - ls::lazy generate; - }; - std::vector passes; - }; -} Instance::Instance( - const DevicePicker& devicePicker, - const std::filesystem::path& shaderDllPath, - bool allowLowPrecision) { - const auto selectFunc = [&devicePicker](const vk::VulkanInstanceFuncs funcs, - const std::vector& devices) { - for (const auto& device : devices) { - // check if the physical device supports VK_EXT_pci_bus_info - uint32_t ext_count{}; - funcs.EnumerateDeviceExtensionProperties(device, nullptr, &ext_count, VK_NULL_HANDLE); + const std::string& deviceId, + const std::filesystem::path& lsfgvkDllPath, + bool allowFP16 +) { + // Create Vulkan context + auto dld{std::make_unique()}; - std::vector extensions(ext_count); - funcs.EnumerateDeviceExtensionProperties(device, nullptr, &ext_count, extensions.data()); + auto instance{vkhelper::createInstance(*dld)}; + auto physdev{vkhelper::findPhysicalDevice(*dld, *instance, deviceId)}; - const bool has_pci_ext = std::ranges::find_if(extensions, - [](const VkExtensionProperties& ext) { - return std::string(std::to_array(ext.extensionName).data()) - == VK_EXT_PCI_BUS_INFO_EXTENSION_NAME; - }) != extensions.end(); + const uint32_t qfi{vkhelper::findComputeQueueFamilyIndex(*dld, physdev)}; + const bool fp16{allowFP16 && vkhelper::checkHalfPrecisionSupport(*dld, physdev)}; - // then fetch all available properties - VkPhysicalDevicePCIBusInfoPropertiesEXT pciInfo{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT - }; - VkPhysicalDeviceProperties2 props{ - .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, - .pNext = has_pci_ext ? &pciInfo : nullptr - }; - funcs.GetPhysicalDeviceProperties2(device, &props); + auto [device, queue] = vkhelper::createDevice(*dld, physdev, qfi, fp16); - std::array devname = std::to_array(props.properties.deviceName); - devname.at(255) = '\0'; // ensure null-termination - - if (devicePicker( - std::string(devname.data()), - { backend::to_hex_id(props.properties.vendorID), - backend::to_hex_id(props.properties.deviceID) }, - has_pci_ext ? std::optional{ - std::to_string(pciInfo.pciBus) + ":" + - std::to_string(pciInfo.pciDevice) + "." + - std::to_string(pciInfo.pciFunction) - } : std::nullopt - )) - return device; - } - - throw ls::vulkan_error("no suitable physical device found"); + // Construct instance + library::ShaderLibrary library{ + *dld, + *device, + fp16, + lsfgvkDllPath }; - this->m_impl = std::make_unique( - selectFunc, shaderDllPath, allowLowPrecision + this->m_priv = std::make_unique(priv::Instance { + .vk = { + .dld = std::move(dld), + .instance = std::move(instance), + .physdev = physdev, + .device = std::move(device), + .queue = queue, + .qfi = qfi, + .fp16 = fp16 + }, + .shaderLibrary = std::move(library) + }); +} + +Context::Context( + const Instance& instance, + uint32_t width, + uint32_t height, + float flowScale, + bool performanceMode +) { + const auto& vk{instance.m_priv->vk}; + + pipeline::Pipeline pipeline{ + *vk.dld, + *vk.device, + vk.physdev, + vk.queue, + vk.qfi, + instance.m_priv->shaderLibrary, + lsfgvk::getPipelineSignature(performanceMode), + { width, height }, + flowScale, + performanceMode, + false + }; + + this->m_priv = std::make_unique(priv::Context { + .instance = std::ref(*instance.m_priv), + .pipeline = std::move(pipeline), + .syncSemaphore = { vkhelper::createTimelineSemaphore(*vk.dld, *vk.device, true), 0 }, + .internalSemaphores = { vkhelper::createTimelineSemaphore(*vk.dld, *vk.device), 0 }, + .fence = vkhelper::createFence(*vk.dld, *vk.device), + }); +} + +FileDescriptors Context::exportFds() const { + const auto& vk{this->m_priv->instance.get().vk}; + const auto& pipeline{this->m_priv->pipeline}; + + return{ + .sourceFd = vkhelper::exportMemoryFd( + *vk.dld, *vk.device, + pipeline.getExternalInputs().front().memory + ), + .destinationFd = vkhelper::exportMemoryFd( + *vk.dld, *vk.device, + pipeline.getExternalOutputs().front().memory + ), + .syncFd = vkhelper::exportSemaphoreFd( + *vk.dld, *vk.device, + *this->m_priv->syncSemaphore.first + ) + }; +} + +void Context::dispatch(uint32_t total) { + auto& ctx{*this->m_priv}; + const auto& vk{ctx.instance.get().vk}; + + // Increment iteration counter after previous frame is completed + auto* mapped{ctx.pipeline.getMappedBuffer()}; + if (ctx.firstIteration) { + ctx.firstIteration = false; + mapped->iteration = 0; + } else { + if (vk.device->waitForFences(*ctx.fence, true, UINT64_MAX, *vk.dld) != vk::Result::eSuccess) + throw std::runtime_error("Unable to wait for completion of previous iteration"); + vk.device->resetFences(*ctx.fence, *vk.dld); + mapped->iteration++; + } + + const auto& cmdbufs{ctx.pipeline.getCmdbufs()}; + + // Dispatch pre-pass + auto& sync{ctx.syncSemaphore}; + sync.second++; + + auto& internal{ctx.internalSemaphores}; + internal.second++; + + vk::TimelineSemaphoreSubmitInfo timelineInfo{ + .waitSemaphoreValueCount = 1, + .pWaitSemaphoreValues = &sync.second, + .signalSemaphoreValueCount = 1, + .pSignalSemaphoreValues = &internal.second + }; + + const vk::PipelineStageFlags waitStage{vk::PipelineStageFlagBits::eTopOfPipe}; + vk.queue.submit( + {{ + .pNext = &timelineInfo, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &*sync.first, + .pWaitDstStageMask = &waitStage, + .commandBufferCount = 1U, + .pCommandBuffers = &*cmdbufs.at(0), + .signalSemaphoreCount = 1, + .pSignalSemaphores = &*internal.first + }}, + nullptr, + *vk.dld ); -} -namespace { - /// find the cache file path - std::filesystem::path findCacheFilePath() { - const char* xdgCacheHome = std::getenv("XDG_CACHE_HOME"); - if (xdgCacheHome && *xdgCacheHome != '\0') - return std::filesystem::path(xdgCacheHome) / "lsfg-vk_pipeline_cache.bin"; + // Dispatch main passes + uint64_t prevInternal{}; + for (uint32_t i = 0; i < total; i++) { + const auto& transCmdbuf{ctx.pipeline.buildTransCmdbuf( + *vk.dld, *vk.device, + mapped->iteration, + i, total + )}; - const char* home = std::getenv("HOME"); - if (home && *home != '\0') - return std::filesystem::path(home) / ".cache" / "lsfg-vk_pipeline_cache.bin"; - - return{"/tmp/lsfg-vk_pipeline_cache.bin"}; - } - /// create a Vulkan instance - vk::Vulkan createVulkanInstance(vk::PhysicalDeviceSelector selectPhysicalDevice) { - try { - return{ - "lsfg-vk", vk::version{2, 0, 0}, - "lsfg-vk-engine", vk::version{2, 0, 0}, - selectPhysicalDevice, - false, std::nullopt, - findCacheFilePath() - }; - } catch (const std::exception& e) { - throw backend::error("Unable to initialize Vulkan", e); - } - } - /// build a shader registry - ShaderRegistry createShaderRegistry(vk::Vulkan& vk, - const std::filesystem::path& shaderDllPath, - bool allowLowPrecision) { - std::unordered_map> resources{}; - - try { - resources = backend::extractResourcesFromDLL(shaderDllPath); - } catch (const std::exception& e) { - throw backend::error("Unable to parse Lossless Scaling DLL", e); + // Transition command buffer to next timestamp + if (i == 0) { + prevInternal = internal.second; + timelineInfo.pWaitSemaphoreValues = &prevInternal; + } else { + sync.second++; + timelineInfo.pWaitSemaphoreValues = &sync.second; } - try { - return backend::buildShaderRegistry( - vk, allowLowPrecision && vk.supportsFP16(), - resources - ); - } catch (const std::exception& e) { - throw backend::error("Unable to build shader registry", e); - } - } -#ifdef LSFGVK_TESTING_RENDERDOC - /// load RenderDoc integration - std::optional loadRenderDocIntegration() { - void* module = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD); - if (!module) - return std::nullopt; + internal.second++; + timelineInfo.pSignalSemaphoreValues = &internal.second; - auto renderdocGetAPI = reinterpret_cast( - dlsym(module, "RENDERDOC_GetAPI")); - if (!renderdocGetAPI) - return std::nullopt; + vk.queue.submit( + {{ + .pNext = &timelineInfo, + .waitSemaphoreCount = 1, + .pWaitSemaphores = i == 0 ? &*internal.first : &*sync.first, + .pWaitDstStageMask = &waitStage, + .commandBufferCount = 1, + .pCommandBuffers = &transCmdbuf, + .signalSemaphoreCount = 1, + .pSignalSemaphores = &*internal.first + }}, + nullptr, + *vk.dld + ); - RENDERDOC_API_1_6_0* api{}; - renderdocGetAPI(eRENDERDOC_API_Version_1_6_0, reinterpret_cast(&api)); - if (!api) - return std::nullopt; + // Dispatch main pass + timelineInfo.pWaitSemaphoreValues = &internal.second; - return *api; - } -#endif -} + sync.second++; + timelineInfo.pSignalSemaphoreValues = &sync.second; -InstanceImpl::InstanceImpl(vk::PhysicalDeviceSelector selectPhysicalDevice, - const std::filesystem::path& shaderDllPath, - bool allowLowPrecision) - : vk(createVulkanInstance(selectPhysicalDevice)), - shaders(createShaderRegistry(this->vk, shaderDllPath, - allowLowPrecision && vk.supportsFP16())) { -#ifdef LSFGVK_TESTING_RENDERDOC - this->renderdoc = loadRenderDocIntegration(); -#endif - vk.persistPipelineCache(); // will silently fail -} - -Context& Instance::openContext(std::pair sourceFds, const std::vector& destFds, - int syncFd, uint32_t width, uint32_t height, - bool hdr, float flow, bool perf) { - const VkExtent2D extent{ width, height }; - return *this->m_contexts.emplace_back(std::make_unique(*this->m_impl, - sourceFds, destFds, syncFd, - extent, hdr, flow, perf - )).get(); -} - -namespace { - /// import source images - std::pair importImages(const vk::Vulkan& vk, - const std::pair& sourceFds, - VkExtent2D extent, VkFormat format) { - try { - return { - vk::Image(vk, extent, format, - VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, sourceFds.first), - vk::Image(vk, extent, format, - VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, sourceFds.second) - }; - } catch (const std::exception& e) { - throw backend::error("Unable to import destination images", e); - } - } - /// import destination images - std::vector importImages(const vk::Vulkan& vk, - const std::vector& destFds, - VkExtent2D extent, VkFormat format) { - try { - std::vector destImages; - destImages.reserve(destFds.size()); - - for (const auto& fd : destFds) - destImages.emplace_back(vk, extent, format, - VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, fd); - - return destImages; - } catch (const std::exception& e) { - throw backend::error("Unable to import destination images", e); - } - } - /// create a black image - vk::Image createBlackImage(const vk::Vulkan& vk) { - try { - return{vk, - { .width = 4, .height = 4 } - }; - } catch (const std::exception& e) { - throw backend::error("Unable to create black image", e); - } - } - /// import timeline semaphore - vk::TimelineSemaphore importTimelineSemaphore(const vk::Vulkan& vk, int syncFd) { - try { - return{vk, 0, syncFd}; - } catch (const std::exception& e) { - throw backend::error("Unable to import timeline semaphore", e); - } - } - /// create prepass semaphores - vk::TimelineSemaphore createPrepassSemaphore(const vk::Vulkan& vk) { - try { - return{vk, 0}; - } catch (const std::exception& e) { - throw backend::error("Unable to create prepass semaphore", e); - } - } - /// create command buffers - std::vector createCommandBuffers(const vk::Vulkan& vk, size_t count) { - try { - std::vector cmdbufs; - cmdbufs.reserve(count); - - for (size_t i = 0; i < count; ++i) - cmdbufs.emplace_back(vk); - - return cmdbufs; - } catch (const std::exception& e) { - throw backend::error("Unable to create command buffers", e); - } - } - /// create context data - Ctx createCtx(const InstanceImpl& instance, VkExtent2D extent, - bool hdr, float flow, bool perf, size_t count) { - const auto& vk = instance.getVulkan(); - const auto& shaders = instance.getShaderRegistry(); - - try { - std::vector constantBuffers{}; - constantBuffers.reserve(count); - - for (size_t i = 0; i < count; ++i) - constantBuffers.emplace_back(vk, - backend::getDefaultConstantBuffer( - i, count, - hdr, flow - ) - ); - - return { - .vk = std::ref(vk), - .shaders = std::ref(shaders), - .pool{vk, backend::calculateDescriptorPoolLimits(count, perf)}, - .constantBuffer{vk, backend::getDefaultConstantBuffer(0, 1, hdr, flow)}, - .constantBuffers{std::move(constantBuffers)}, - .bnbSampler{vk, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, false}, - .bnwSampler{vk, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, true}, - .eabSampler{vk, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS, false}, - .sourceExtent = extent, - .flowExtent = VkExtent2D { - .width = static_cast(static_cast(extent.width) / flow), - .height = static_cast(static_cast(extent.height) / flow) - }, - .hdr = hdr, - .flow = flow, - .perf = perf, - .count = count - }; - } catch (const std::exception& e) { - throw backend::error("Unable to create context", e); - } - } -} - -ContextImpl::ContextImpl(const InstanceImpl& instance, - std::pair sourceFds, const std::vector& destFds, int syncFd, - VkExtent2D extent, bool hdr, float flow, bool perf) : - sourceImages(importImages(instance.getVulkan(), sourceFds, - extent, hdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM)), - destImages(importImages(instance.getVulkan(), destFds, - extent, hdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM)), - blackImage(createBlackImage(instance.getVulkan())), - syncSemaphore(importTimelineSemaphore(instance.getVulkan(), syncFd)), - prepassSemaphore(createPrepassSemaphore(instance.getVulkan())), - cmdbufs(createCommandBuffers(instance.getVulkan(), destFds.size() + 1)), - cmdbufFence(instance.getVulkan()), - ctx(createCtx(instance, extent, hdr, flow, perf, destFds.size())), - mipmaps(ctx, sourceImages), - alpha0{ - Alpha0(ctx, mipmaps.getImages().at(0)), - Alpha0(ctx, mipmaps.getImages().at(1)), - Alpha0(ctx, mipmaps.getImages().at(2)), - Alpha0(ctx, mipmaps.getImages().at(3)), - Alpha0(ctx, mipmaps.getImages().at(4)), - Alpha0(ctx, mipmaps.getImages().at(5)), - Alpha0(ctx, mipmaps.getImages().at(6)) - }, - alpha1{ - Alpha1(ctx, 3, alpha0.at(0).getImages()), - Alpha1(ctx, 2, alpha0.at(1).getImages()), - Alpha1(ctx, 2, alpha0.at(2).getImages()), - Alpha1(ctx, 2, alpha0.at(3).getImages()), - Alpha1(ctx, 2, alpha0.at(4).getImages()), - Alpha1(ctx, 2, alpha0.at(5).getImages()), - Alpha1(ctx, 2, alpha0.at(6).getImages()) - }, - beta0(ctx, alpha1.at(0).getImages()), - beta1(ctx, beta0.getImages()) { - // build main passes - for (size_t i = 0; i < destImages.size(); ++i) { - auto& pass = this->passes.emplace_back(); - - pass.gamma0.reserve(7); - pass.gamma1.reserve(7); - pass.delta0.reserve(3); - pass.delta1.reserve(3); - for (size_t j = 0; j < 7; j++) { - if (j == 0) { // first pass has no prior data - pass.gamma0.emplace_back(ctx, i, - this->alpha1.at(6 - j).getImages(), - this->blackImage - ); - pass.gamma1.emplace_back(ctx, i, - pass.gamma0.at(j).getImages(), - this->blackImage, - this->beta1.getImages().at(5) - ); - } else { // other passes use prior data - pass.gamma0.emplace_back(ctx, i, - this->alpha1.at(6 - j).getImages(), - pass.gamma1.at(j - 1).getImage() - ); - pass.gamma1.emplace_back(ctx, i, - pass.gamma0.at(j).getImages(), - pass.gamma1.at(j - 1).getImage(), - this->beta1.getImages().at(6 - j) - ); - } - - if (j == 4) { // first special pass has no prior data - pass.delta0.emplace_back(ctx, i, - this->alpha1.at(6 - j).getImages(), - this->blackImage, - pass.gamma1.at(j - 1).getImage() - ); - pass.delta1.emplace_back(ctx, i, - pass.delta0.at(j - 4).getImages0(), - pass.delta0.at(j - 4).getImages1(), - this->blackImage, - this->beta1.getImages().at(6 - j), - this->blackImage - ); - } else if (j > 4) { // further passes do - pass.delta0.emplace_back(ctx, i, - this->alpha1.at(6 - j).getImages(), - pass.delta1.at(j - 5).getImage0(), - pass.gamma1.at(j - 1).getImage() - ); - pass.delta1.emplace_back(ctx, i, - pass.delta0.at(j - 4).getImages0(), - pass.delta0.at(j - 4).getImages1(), - pass.delta1.at(j - 5).getImage0(), - this->beta1.getImages().at(6 - j), - pass.delta1.at(j - 5).getImage1() - ); - } - } - - pass.generate.emplace(ctx, i, - this->sourceImages, - pass.gamma1.at(6).getImage(), - pass.delta1.at(2).getImage0(), - pass.delta1.at(2).getImage1(), - this->destImages.at(i) + vk.queue.submit( + {{ + .pNext = &timelineInfo, + .waitSemaphoreCount = 1, + .pWaitSemaphores = &*internal.first, + .pWaitDstStageMask = &waitStage, + .commandBufferCount = 1, + .pCommandBuffers = &*cmdbufs.at(1), + .signalSemaphoreCount = 1, + .pSignalSemaphores = &*sync.first + }}, + i == (total - 1) ? *ctx.fence : nullptr, + *vk.dld ); } - - // initialize all images - std::vector images{}; - images.push_back(this->blackImage.handle()); - mipmaps.prepare(images); - for (size_t i = 0; i < 7; ++i) { - alpha0.at(i).prepare(images); - alpha1.at(i).prepare(images); - } - beta0.prepare(images); - beta1.prepare(images); - for (const auto& pass : this->passes) { - for (size_t i = 0; i < 7; ++i) { - pass.gamma0.at(i).prepare(images); - pass.gamma1.at(i).prepare(images); - - if (i < 4) continue; - pass.delta0.at(i - 4).prepare(images); - pass.delta1.at(i - 4).prepare(images); - } - } - - std::vector barriers{}; - barriers.reserve(images.size()); - - for (const auto& image : images) { - barriers.emplace_back(vk::Barrier { - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange = { - .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .levelCount = 1, - .layerCount = 1 - } - }); - } - - const vk::CommandBuffer cmdbuf{ctx.vk}; - cmdbuf.begin(ctx.vk); - cmdbuf.insertBarriers(ctx.vk, barriers); - cmdbuf.end(ctx.vk); - cmdbuf.submit(ctx.vk); // wait for completion } -void Instance::scheduleFrames(Context& context) { // NOLINT (static) -#ifdef LSFGVK_TESTING_RENDERDOC - const auto& impl = this->m_impl; - if (impl->getRenderDocAPI()) { - impl->getRenderDocAPI()->StartFrameCapture( - RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(impl->getVulkan().inst()), - nullptr); - } -#endif +void Context::idle() const { + const auto& ctx{*this->m_priv}; + const auto& vk{ctx.instance.get().vk}; + + vk.device->waitIdle(*vk.dld); +} + +Context::~Context() { try { - context.scheduleFrames(); - } catch (const std::exception& e) { - throw backend::error("Unable to schedule frames", e); + // NOTE: This will freeze if the user didn't signal the sync semaphore high enough to + // allow the pipeline to complete. + this->idle(); + } catch (...) { // NOLINT (empty catch) + // Not much we can do here.. } -#ifdef LSFGVK_TESTING_RENDERDOC - if (impl->getRenderDocAPI()) { - impl->getVulkan().df().DeviceWaitIdle(impl->getVulkan().dev()); - impl->getRenderDocAPI()->EndFrameCapture( - RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(impl->getVulkan().inst()), - nullptr); - } -#endif } -void Context::scheduleFrames() { - // wait for previous pre-pass to complete - if (this->fidx && !this->cmdbufFence.wait(this->ctx.vk)) - throw backend::error("Timeout waiting for previous frame to complete"); - this->cmdbufFence.reset(this->ctx.vk); - - // schedule pre-pass - const auto& cmdbuf = this->cmdbufs.at(0); - cmdbuf.begin(ctx.vk); - - this->mipmaps.render(ctx.vk, cmdbuf, this->fidx); - for (size_t i = 0; i < 7; ++i) { - this->alpha0.at(6 - i).render(ctx.vk, cmdbuf); - this->alpha1.at(6 - i).render(ctx.vk, cmdbuf, this->fidx); - } - this->beta0.render(ctx.vk, cmdbuf, this->fidx); - this->beta1.render(ctx.vk, cmdbuf); - - cmdbuf.end(ctx.vk); - cmdbuf.submit(this->ctx.vk, - {}, this->syncSemaphore.handle(), this->idx, - {}, this->prepassSemaphore.handle(), this->idx - ); - - this->idx++; - - // schedule main passes - for (size_t i = 0; i < this->destImages.size(); i++) { - const auto& cmdbuf = this->cmdbufs.at(i + 1); - cmdbuf.begin(ctx.vk); - - const auto& pass = this->passes.at(i); - for (size_t j = 0; j < 7; j++) { - pass.gamma0.at(j).render(ctx.vk, cmdbuf, this->fidx); - pass.gamma1.at(j).render(ctx.vk, cmdbuf); - - if (j < 4) continue; - pass.delta0.at(j - 4).render(ctx.vk, cmdbuf, this->fidx); - pass.delta1.at(j - 4).render(ctx.vk, cmdbuf); - } - pass.generate->render(ctx.vk, cmdbuf, this->fidx); - - cmdbuf.end(ctx.vk); - cmdbuf.submit(this->ctx.vk, - {}, this->prepassSemaphore.handle(), this->idx - 1, - {}, this->syncSemaphore.handle(), this->idx + i, - i == this->destImages.size() - 1 ? this->cmdbufFence.handle() : VK_NULL_HANDLE - ); - } - - this->idx += this->destImages.size(); - this->fidx++; +VkInstance Instance::_instance() const { + return this->m_priv->vk.instance.get(); } -void Instance::closeContext(const Context& context) { - auto it = std::ranges::find_if(this->m_contexts, - [context = &context](const std::unique_ptr& ctx) { - return ctx.get() == context; - }); - if (it == this->m_contexts.end()) - throw backend::error("attempted to close unknown context", - std::runtime_error("no such context")); - - const auto& vk = this->m_impl->getVulkan(); - vk.df().DeviceWaitIdle(vk.dev()); - - this->m_contexts.erase(it); +VkDevice Instance::_device() const { + return *this->m_priv->vk.device; } Instance::~Instance() = default; - -// leaking shenanigans - -namespace { - bool leaking{false}; // NOLINT (global variable) -} - -InstanceImpl::~InstanceImpl() { - if (!leaking) return; - - try { - new vk::Vulkan(std::move(this->vk)); - } catch (...) { - std::cerr << "lsfg-vk: failed to leak Vulkan instance\n"; - } - -} - -void backend::makeLeaking() { - leaking = true; -} diff --git a/lsfg-vk-backend/src/lsfgvk.hpp b/lsfg-vk-backend/src/lsfgvk.hpp new file mode 100644 index 0000000..7d6ba44 --- /dev/null +++ b/lsfg-vk-backend/src/lsfgvk.hpp @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#pragma once + +#define LSFGVK_PRIV +#include "lsfg-vk/lsfgvk.hpp" // IWYU pragma: export + +#include "modules/pipeline.hpp" +#include "modules/library.hpp" +#include "utility/vkhelper.hpp" + +#include +#include +#include + +namespace lsfgvk::priv { + + + /// Internal state of lsfg-vk + struct Instance { + /// Vulkan context + struct Vulkan { + /// Vulkan dispatch loader + std::unique_ptr dld; + /// Vulkan instance (1.2) + vk::UniqueInstance instance; + /// Vulkan physical device + vk::PhysicalDevice physdev; + /// Vulkan device with synchronization2 (extension), external memory & semaphore + /// fd (extension) and timeline semaphores (core) enabled + vk::UniqueDevice device; + /// Compute queue + vk::Queue queue; + /// Compute queue family index + uint32_t qfi; + /// Whether fp16 is enabled and supported (shaderFloat16 is enabled) + bool fp16; + } vk; + /// Shader library + library::ShaderLibrary shaderLibrary; + }; + + /// Internal context for frame generation + struct Context { + /// Parent instance + std::reference_wrapper instance; + /// Pipeline instance + pipeline::Pipeline pipeline; + /// Shared synchronization semaphores + std::pair syncSemaphore; + /// Internal synchronization semaphores + std::pair internalSemaphores; + /// Frames-in-flight fence + vk::UniqueFence fence; + /// Is first iteration + bool firstIteration{true}; + }; + +} diff --git a/lsfg-vk-backend/src/modules/library.cpp b/lsfg-vk-backend/src/modules/library.cpp new file mode 100644 index 0000000..31f52bb --- /dev/null +++ b/lsfg-vk-backend/src/modules/library.cpp @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#include "library.hpp" +#include "library/dll.hpp" +#include "utility/vkhelper.hpp" + +#include +#include +#include +#include +#include +#include +#include + +/// All base shaders in the library. +const std::array, 3> BASE_LIBRARY{{ + { "mipmaps", 0 }, + { "generate_8bit", 1 }, + { "generate_16bit", 2 }, +}}; + +/// All non-base shaders in the library. +const std::array, 24> LIBRARY{{ + { "alpha0", 13 }, + { "alpha1", 14 }, + { "alpha2", 15 }, + { "alpha3", 16 }, + { "beta0", 22 }, + { "beta1", 23 }, + { "beta2", 24 }, + { "beta3", 25 }, + { "beta4", 26 }, + { "gamma0", 3 }, + { "gamma1", 4 }, + { "gamma2", 5 }, + { "gamma3", 6 }, + { "gamma4", 7 }, + { "delta0", 8 }, + { "delta1", 9 }, + { "delta2", 10 }, + { "delta3", 11 }, + { "delta4", 12 }, + { "epsilon0", 17 }, + { "epsilon1", 18 }, + { "epsilon2", 19 }, + { "epsilon3", 20 }, + { "epsilon4", 21 } +}}; + +using namespace lsfgvk::library; + +ShaderLibrary::ShaderLibrary( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + bool halfPrecision, + const std::filesystem::path& dll +) { + if (!std::filesystem::exists(dll)) { + throw std::runtime_error("The specified shader DLL does not exist"); + } + // Create shader modules for each shader in the library + const auto resources = priv::parseDll(dll); + for (const auto& [name, idx] : BASE_LIBRARY) { + const uint32_t rid{idx}; + + const auto& it = resources.find(rid == 0 ? 2147488584U : rid); + if (it == resources.end()) + throw std::runtime_error( + "Unable to find base shader '" + std::string(name) + "' in DLL" + ); + + this->m_baseShaders[name] = vkhelper::createShaderModule(dld, device, it->second); + } + + for (const auto& [name, idx] : LIBRARY) { + const std::pair rid{ + idx + (halfPrecision ? 48 : 0), + idx + (halfPrecision ? 48 : 0) + 24 + }; + + const auto& qit{resources.find(rid.first)}; + const auto& pit{resources.find(rid.second)}; + if (qit == resources.end() || pit == resources.end()) + throw std::runtime_error( + "Unable to find shader '" + std::string(name) + "' in DLL" + ); + + this->m_qualityShaders[name] = vkhelper::createShaderModule(dld, device, qit->second); + this->m_performanceShaders[name] = vkhelper::createShaderModule(dld, device, pit->second); + } +} diff --git a/lsfg-vk-backend/src/modules/library.hpp b/lsfg-vk-backend/src/modules/library.hpp new file mode 100644 index 0000000..4accd1a --- /dev/null +++ b/lsfg-vk-backend/src/modules/library.hpp @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#pragma once + +#include "utility/vkhelper.hpp" + +#include +#include +#include + +namespace lsfgvk::library { + + /// + /// The lsfg-vk shader library + /// + class ShaderLibrary { + public: + /// + /// Create the shader library + /// + /// @param dld Vulkan dynamic dispatch loader + /// @param device Vulkan device + /// @param halfPrecision Whether to load the half-precision shader variants + /// @param dll Path to the shader DLL file + /// @throws std::runtime_error on failure + /// + explicit ShaderLibrary( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + bool halfPrecision, + const std::filesystem::path& dll + ); + + /// + /// Get a base shader by name + /// + /// @param name Shader name + /// @return A reference to the shader + /// @throws std::out_of_range if the shader is not found + /// + [[nodiscard]] const auto& baseShader(std::string_view name) const { + return this->m_baseShaders.at(name); + } + + /// + /// Get a shader by name + /// + /// @param name Shader name + /// @param perf Whether to get the performance variant of the shader + /// @return A reference to the shader + /// @throws std::out_of_range if the shader is not found + /// + [[nodiscard]] const auto& shader(std::string_view name, bool perf) const { + auto it{this->m_baseShaders.find(name)}; + if (it != this->m_baseShaders.end()) + return it->second; + + return perf ? this->m_performanceShaders.at(name) : this->m_qualityShaders.at(name); + } + + private: + std::unordered_map m_baseShaders; + std::unordered_map m_qualityShaders; + std::unordered_map m_performanceShaders; + }; + +} diff --git a/lsfg-vk-backend/src/extraction/dll_reader.cpp b/lsfg-vk-backend/src/modules/library/dll.cpp similarity index 53% rename from lsfg-vk-backend/src/extraction/dll_reader.cpp rename to lsfg-vk-backend/src/modules/library/dll.cpp index 13fcd07..9883b82 100644 --- a/lsfg-vk-backend/src/extraction/dll_reader.cpp +++ b/lsfg-vk-backend/src/modules/library/dll.cpp @@ -1,31 +1,27 @@ /* SPDX-License-Identifier: GPL-3.0-or-later */ -#include "dll_reader.hpp" -#include "lsfg-vk-common/helpers/errors.hpp" +#include "dll.hpp" -#include -#include -#include -#include -#include -#include +#include #include #include +#include #include +#include +#include +#include +#include +#include +#include #include #include -#include -#include - -using namespace lsfgvk; -using namespace lsfgvk::backend; namespace { /// DOS file header struct DOSHeader { uint16_t magic; // 0x5A4D std::array pad; - int32_t pe_offset; // file offset + int32_t pe_offset; // File offset }; /// PE header @@ -42,15 +38,15 @@ namespace { struct PEOptionalHeader { uint16_t magic; // 0x20B std::array pad4; - std::pair resource_table; // file offset/size + std::pair resource_table; // File offset/size }; /// Section header struct SectionHeader { std::array pad1; - uint32_t vsize; // virtual + uint32_t vsize; // Virtual uint32_t vaddress; - uint32_t fsize; // raw + uint32_t fsize; // Raw uint32_t foffset; std::array pad2; }; @@ -65,7 +61,7 @@ namespace { /// Resource directory entry struct ResourceDirectoryEntry { uint32_t id; - uint32_t offset; // high bit = directory + uint32_t offset; // High bit = Directory }; /// Resource data entry @@ -74,68 +70,68 @@ namespace { uint32_t size; std::array pad; }; -} #pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunknown-warning-option" #pragma clang diagnostic ignored "-Wunsafe-buffer-usage-in-container" -namespace { /// Safely cast a vector to a pointer of type T template const T* safe_cast(const std::vector& data, size_t offset) { - const size_t end = offset + sizeof(T); + const size_t end{offset + sizeof(T)}; if (end > data.size() || end < offset) - throw ls::error("buffer overflow/underflow during safe cast"); - return reinterpret_cast(&data.at(offset)); + throw std::runtime_error("Buffer overflow/underflow during safe cast"); + return reinterpret_cast(&data.at(offset)); // NOLINT (unsafe cast) } /// Safely cast a vector to a span of T template std::span span_cast(const std::vector& data, size_t offset, size_t count) { - const size_t end = offset + (count * sizeof(T)); + const size_t end{offset + (count * sizeof(T))}; if (end > data.size() || end < offset) - throw ls::error("buffer overflow/underflow during safe cast"); - return std::span(reinterpret_cast(&data.at(offset)), count); + throw std::runtime_error("Buffer overflow/underflow during safe cast"); + return{ reinterpret_cast(&data.at(offset)), count }; // NOLINT (unsafe cast) } -} #pragma clang diagnostic pop +} -std::unordered_map> backend::extractResourcesFromDLL( - const std::filesystem::path& dll) { +using namespace lsfgvk::library; + +std::unordered_map> priv::parseDll( + const std::filesystem::path& dll +) { std::ifstream file(dll, std::ios::binary | std::ios::ate); if (!file.is_open()) - throw ls::error("failed to open dll file"); + throw std::runtime_error("Unable to open file"); - const std::streamsize size = static_cast(file.tellg()); + const std::streamsize size{static_cast(file.tellg())}; file.seekg(0, std::ios::beg); std::vector data(static_cast(size)); - if (!file.read(reinterpret_cast(data.data()), size)) - throw ls::error("failed to read dll file"); + if (!file.read(reinterpret_cast(data.data()), size)) // NOLINT (unsafe cast) + throw std::runtime_error("Unable to read file"); - // parse dos header - size_t fileOffset = 0; - const auto* dosHdr = safe_cast(data, 0); + // Parse dos header + size_t fileOffset{0}; + const auto* dosHdr{safe_cast(data, 0)}; if (dosHdr->magic != 0x5A4D) - throw ls::error("dos header magic number is incorrect"); + throw std::runtime_error("Magic number in DOS header is incorrect"); - // parse pe header + // Parse pe header fileOffset += static_cast(dosHdr->pe_offset); - const auto* peHdr = safe_cast(data, fileOffset); + const auto* peHdr{safe_cast(data, fileOffset)}; if (peHdr->signature != 0x00004550) - throw ls::error("pe header signature is incorrect"); + throw std::runtime_error("Signature in PE header is incorrect"); - // parse optional pe header + // Parse optional pe header fileOffset += sizeof(PEHeader); - const auto* peOptHdr = safe_cast(data, fileOffset); + const auto* peOptHdr{safe_cast(data, fileOffset)}; if (peOptHdr->magic != 0x20B) - throw ls::error("pe format is not PE32+"); + throw std::runtime_error("PE format is not PE32+"); const auto& [rsrc_rva, rsrc_size] = peOptHdr->resource_table; - // locate section containing resources + // }Locate section containing resources std::optional rsrc_offset; fileOffset += peHdr->opt_hdr_size; - const auto sectHdrs = span_cast(data, fileOffset, peHdr->sect_count); + const auto sectHdrs{span_cast(data, fileOffset, peHdr->sect_count)}; for (const auto& sectHdr : sectHdrs) { if (rsrc_rva < sectHdr.vaddress || rsrc_rva > (sectHdr.vaddress + sectHdr.vsize)) continue; @@ -144,69 +140,71 @@ std::unordered_map> backend::extractResourcesFrom break; } if (!rsrc_offset) - throw ls::error("unable to locate resource section"); + throw std::runtime_error("Unable to locate resource section"); - // parse resource directory + // Parse resource directory fileOffset = rsrc_offset.value(); - const auto* rsrcDir = safe_cast(data, fileOffset); + const auto* rsrcDir{safe_cast(data, fileOffset)}; if (rsrcDir->id_count < 3) - throw ls::error("resource directory does not have enough entries"); + throw std::runtime_error("Resource directory does not have enough entries"); - // find resource table with data type + // Find resource table with data type std::optional rsrc_tbl_offset; fileOffset = rsrc_offset.value() + sizeof(ResourceDirectory); - const auto rsrcDirEntries = span_cast( - data, fileOffset, rsrcDir->name_count + rsrcDir->id_count); + const auto rsrcDirEntries{span_cast( + data, fileOffset, rsrcDir->name_count + rsrcDir->id_count)}; for (const auto& rsrcDirEntry : rsrcDirEntries) { if (rsrcDirEntry.id != 10) // RT_RCDATA continue; if ((rsrcDirEntry.offset & 0x80000000) == 0) - throw ls::error("expected resource directory, found data entry"); + throw std::runtime_error("Expected resource directory, found data entry"); rsrc_tbl_offset.emplace(rsrcDirEntry.offset & 0x7FFFFFFF); } if (!rsrc_tbl_offset) - throw ls::error("unabele to locate RT_RCDATA directory"); + throw std::runtime_error("Unable to locate RT_RCDATA directory"); - // parse data type resource directory + // Parse data type resource directory fileOffset = rsrc_offset.value() + rsrc_tbl_offset.value(); - const auto* rsrcTbl = safe_cast(data, fileOffset); + const auto* rsrcTbl{safe_cast(data, fileOffset)}; if (rsrcTbl->id_count < 1) - throw ls::error("RT_RCDATA directory does not have enough entries"); + throw std::runtime_error("RT_RCDATA directory does not have enough entries"); - // collect all resources + // Collect all resources fileOffset += sizeof(ResourceDirectory); - const auto rsrcTblEntries = span_cast( - data, fileOffset, rsrcTbl->name_count + rsrcTbl->id_count); - std::unordered_map> resources; + const auto rsrcTblEntries{span_cast( + data, fileOffset, rsrcTbl->name_count + rsrcTbl->id_count)}; + + std::unordered_map> resources; + resources.reserve(rsrcTbl->id_count); + for (const auto& rsrcTblEntry : rsrcTblEntries) { if ((rsrcTblEntry.offset & 0x80000000) == 0) - throw ls::error("expected resource directory, found data entry"); + throw std::runtime_error("Expected resource directory, found data entry"); - // skip over language directory + // Skip over language directory fileOffset = rsrc_offset.value() + (rsrcTblEntry.offset & 0x7FFFFFFF); const auto* langDir = safe_cast(data, fileOffset); if (langDir->id_count < 1) - throw ls::error("Incorrect language directory"); + throw std::runtime_error("Malformed language directory"); fileOffset += sizeof(ResourceDirectory); - const auto* langDirEntry = safe_cast(data, fileOffset); + const auto* langDirEntry{safe_cast(data, fileOffset)}; if ((langDirEntry->offset & 0x80000000) != 0) - throw ls::error("expected resource data entry, but found directory"); + throw std::runtime_error("Expected resource data entry, found directory"); - // parse resource data entry + // Parse resource data entry fileOffset = rsrc_offset.value() + (langDirEntry->offset & 0x7FFFFFFF); - const auto* entry = safe_cast(data, fileOffset); + const auto* entry{safe_cast(data, fileOffset)}; if (entry->offset < rsrc_rva || entry->offset > (rsrc_rva + rsrc_size)) - throw ls::error("resource data entry points outside resource section"); + throw std::runtime_error("Resource data entry points outside resource section"); - // extract resource - std::vector resource(entry->size); + // Extract resource fileOffset = (entry->offset - rsrc_rva) + rsrc_offset.value(); - if (fileOffset + entry->size > data.size()) - throw ls::error("resource data entry points outside file"); - std::copy_n(&data.at(fileOffset), entry->size, resource.data()); - resources.emplace(rsrcTblEntry.id, std::move(resource)); + const auto rdata{span_cast( + data, fileOffset, entry->size / sizeof(uint32_t))}; + + resources.emplace(rsrcTblEntry.id, std::vector(rdata.begin(), rdata.end())); } return resources; diff --git a/lsfg-vk-backend/src/modules/library/dll.hpp b/lsfg-vk-backend/src/modules/library/dll.hpp new file mode 100644 index 0000000..3a43642 --- /dev/null +++ b/lsfg-vk-backend/src/modules/library/dll.hpp @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#pragma once + +#include +#include +#include +#include + +namespace lsfgvk::library::priv { + + /// + /// Parse all resources from a DLL file + /// + /// @param dll File path + /// @returns Map of resource ID to data + /// @throws std::runtime_error if the file is invalid or cannot be read + /// + std::unordered_map> parseDll( + const std::filesystem::path& dll + ); + +} diff --git a/lsfg-vk-backend/src/modules/pipeline.cpp b/lsfg-vk-backend/src/modules/pipeline.cpp new file mode 100644 index 0000000..c5f7d50 --- /dev/null +++ b/lsfg-vk-backend/src/modules/pipeline.cpp @@ -0,0 +1,838 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#include "pipeline.hpp" +#include "library.hpp" +#include "modules/pipeline/signature.hpp" +#include "modules/pipeline/signature/helpers.hpp" +#include "modules/pipeline/signature/image.hpp" +#include "modules/pipeline/signature/pass.hpp" +#include "utility/vkhelper.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace lsfgvk::pipeline; + +namespace { + /// Helper method to apply extent operations + vk::Extent2D apply( + const vk::Extent2D& base, + const vk::Extent2D& flow, + const ExtentOp& op + ) { + vk::Extent2D result{op.flow() ? flow : base}; + for (const auto& [add, shift] : op.operations()) { + result.width = (result.width + add) >> shift; + result.height = (result.height + add) >> shift; + } + return { result.width, result.height }; + } +} + +Pipeline::Pipeline( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + const vk::Queue& queue, + uint32_t queueFamilyIndex, + const library::ShaderLibrary& library, + const PipelineSignature& signature, + vk::Extent2D extent, + float flow, + bool perf, + bool hdr +) { + // Build the Vulkan descriptor set layout + uint32_t sampledImageCount{}; + uint32_t storageImageCount{}; + + std::vector bindings; + bindings.reserve(4 + signature.descriptors.size()); + + bindings.push_back({ + .binding = 0, + .descriptorType = vk::DescriptorType::eUniformBuffer, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute + }); + + for (uint32_t i = 1; i <= 3; i++) { + bindings.push_back({ + .binding = i, + .descriptorType = vk::DescriptorType::eSampler, + .descriptorCount = 1, + .stageFlags = vk::ShaderStageFlagBits::eCompute + }); + } + + uint32_t bindingIdx{4}; + for (const auto& binding : signature.descriptors) { + uint32_t descriptorCount{static_cast(binding.resources.size())}; + if (descriptorCount == 1) { + const auto& image{signature.images.at(binding.resources.front())}; + if (image.flags & ImageFlag::Mipmaps) + descriptorCount = image.count; + } + + bindings.push_back({ + .binding = bindingIdx++, + .descriptorType = binding.type == BindingType::StorageImage ? + vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage, + .descriptorCount = descriptorCount, + .stageFlags = vk::ShaderStageFlagBits::eCompute + }); + + if (binding.type == BindingType::StorageImage) + storageImageCount += descriptorCount; + else + sampledImageCount += descriptorCount; + } + + auto [layout, pipelineLayout] = vkhelper::createLayout( + dld, + device, + bindings, + sizeof(PushConstants) + ); + this->m_layout = { + .layout = std::move(layout), + .pipelineLayout = std::move(pipelineLayout) + }; + + // Create the Vulkan images + vk::DeviceSize alignment{}; + uint32_t types{~0U}; + + const vk::Extent2D flowExtent{ + static_cast(static_cast(extent.width) * flow), + static_cast(static_cast(extent.height) * flow) + }; + for (const auto& imageSignature : signature.images) { + const auto imageIdx{this->m_images.size()}; + auto& image{this->m_images.emplace_back()}; + image = { + .signature = imageSignature + }; + + const bool hasHdrVariant{image.signature.flags & ImageFlag::HdrVariant}; + const vk::Format format{ + (hasHdrVariant && hdr) ? + static_cast(image.signature.hdrFormat) : + static_cast(image.signature.format) + }; + const vk::Extent2D baseExtent{apply(extent, flowExtent, image.signature.extentOp)}; + const vk::ImageUsageFlags usage{ + vk::ImageUsageFlagBits::eStorage | vk::ImageUsageFlagBits::eSampled + }; + + const bool isMipmapped{image.signature.flags & ImageFlag::Mipmaps}; + for (uint32_t i = 0; i < image.signature.count; i++) { + const vk::Extent2D imageExtent{ + .width = std::max(baseExtent.width >> i, 1U), + .height = std::max(baseExtent.height >> i, 1U) + }; + + if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput)) { + const bool isInputOr{image.signature.flags & ImageFlag::ExternalInput}; + + auto [subimage, allocation] = vkhelper::createExternalImage( + dld, + device, + physdev, + imageExtent, + format, + image.signature.count, + usage | + (isInputOr ? + vk::ImageUsageFlagBits::eTransferDst + : vk::ImageUsageFlagBits::eTransferSrc) + ); + + if (isInputOr) { + this->m_externalInputs.push_back({ + .extent = imageExtent, + .format = format, + .layers = image.signature.count, + .image = *subimage, + .memory = *allocation + }); + } else { + this->m_externalOutputs.push_back({ + .extent = imageExtent, + .format = format, + .layers = image.signature.count, + .image = *subimage, + .memory = *allocation + }); + } + + image.subimages.push_back({ + .image = std::move(subimage) + }); + this->m_externalAllocations[imageIdx] = std::move(allocation); + + break; // There can only be one image + } + + image.subimages.push_back({ + .image = vkhelper::createImage( + dld, + device, + imageExtent, + format, + isMipmapped ? 1 : image.signature.count, + usage + ) + }); + + if (!isMipmapped) { + break; + } + } + + for (auto& subimage : image.subimages) { + subimage.memory = device.getImageMemoryRequirements(*subimage.image, dld); + + if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput)) + break; + + alignment = std::max(alignment, subimage.memory.alignment); + types &= subimage.memory.memoryTypeBits; + } + } + + if (types == 0) + throw std::runtime_error("No compatible memory type found for pipeline images"); + + // Fill in image sizes in respect to alignment + for (auto& image : this->m_images) { + if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput)) + continue; // External inputs have dedicated allocations + + for (const auto& subimage : image.subimages) { + image.size += vkhelper::align(subimage.memory.size, alignment); + } + } + + // Calculate optimal-ish allocations using heuristics & greedy fit strategy + std::vector images(signature.images.size()); + std::iota(images.begin(), images.end(), 0); + + std::ranges::sort(images, [&](const auto& a, const auto& b) { + return this->m_images.at(a).size > this->m_images.at(b).size; + }); + + std::vector placements; + for (const auto& imageIdx : images) { + const auto& image{this->m_images.at(imageIdx)}; + if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput)) + continue; + + auto& allocation{ + (image.signature.flags & ImageFlag::Pinned) + ? this->m_allocations.at(1) + : this->m_allocations.at(0) + }; + auto& segment{allocation.segments.emplace_back()}; + + vk::DeviceSize size{}; + for (const auto& subimage : image.subimages) { + const vk::DeviceSize alignedSize{vkhelper::align(subimage.memory.size, alignment)}; + segment.subsegments.push_back({ + .size = alignedSize, + .offset = size + }); + + size += alignedSize; + } + + if (image.signature.flags & ImageFlag::Pinned) { + segment = { + .imageIdx = imageIdx, + .subsegments = segment.subsegments, + .size = size, + .offset = allocation.size, + }; + allocation.size += size; + } else { + const auto lifetime{image.signature.lifetime}; + + vk::DeviceSize offset{}; + for (const auto& otherSegmentIdx : placements) { + const auto& otherSegment{allocation.segments.at(otherSegmentIdx)}; + if (otherSegment.imageIdx == imageIdx) + continue; // Skip self + + const auto& otherImage{this->m_images.at(otherSegment.imageIdx)}; + const auto& otherLifetime{otherImage.signature.lifetime}; + + if (lifetime.first > otherLifetime.second || + lifetime.second < otherLifetime.first) + continue; // Skip horizontally non-overlapping + + if (offset >= (otherSegment.offset + otherSegment.size) || + otherSegment.offset >= (offset + size)) + continue; // Skip vertically non-overlapping + + offset = otherSegment.offset + otherSegment.size; + } + + allocation.size = std::max(allocation.size, offset + size); + segment = { + .imageIdx = imageIdx, + .subsegments = segment.subsegments, + .size = size, + .offset = offset, + }; + + const size_t i{allocation.segments.size() - 1}; + auto it{std::ranges::upper_bound(placements, i, + [&](const auto& a, const auto& b) { + return allocation.segments.at(a).offset < allocation.segments.at(b).offset; + } + )}; + placements.insert(it, i); + } + } + + // Allocate the memory & bind the images + for (auto& allocation : this->m_allocations) { + allocation.memory = vkhelper::allocateMemory( + dld, + device, + physdev, + allocation.size, + types + ); + + for (const auto& segment : allocation.segments) { + const auto& image{this->m_images.at(segment.imageIdx)}; + + for (size_t i = 0; i < image.subimages.size(); i++) { + const auto& subsegment{segment.subsegments.at(i)}; + const auto& subimage{image.subimages.at(i)}; + + device.bindImageMemory( + *subimage.image, + *allocation.memory, + segment.offset + subsegment.offset, + dld + ); + } + } + } + + // Create image views + for (auto& image : this->m_images) { + const bool hasHdrVariant{image.signature.flags & ImageFlag::HdrVariant}; + const bool isLayered{image.subimages.size() == 1 && image.signature.count > 1}; + + for (auto& subimage : image.subimages) { + subimage.view = vkhelper::createImageView( + dld, + device, + *subimage.image, + static_cast((hasHdrVariant && hdr) + ? image.signature.hdrFormat : image.signature.format), + isLayered ? image.signature.count : 1 + ); + } + } + + // Create the descriptor set & required resources + auto [pool, set] = vkhelper::createDescriptorSet( + dld, + device, + *this->m_layout.layout, + 3, 1, sampledImageCount, storageImageCount + ); + this->m_descriptorSet.pool = std::move(pool); + this->m_descriptorSet.set = set; + + const UniformBuffer buf{ + .advancedColorKind = hdr ? 2U : 0U, + .hdrSupport = hdr ? 1U : 0U, + .resolutionInvScale = 1.0F / flow, + .uiThreshold = 0.5F + }; + this->m_descriptorSet.buffer = vkhelper::createBuffer( + dld, + device, + physdev, + buf + ); + auto* mapped{static_cast( + device.mapMemory( + *this->m_descriptorSet.buffer.second, + 0, + VK_WHOLE_SIZE, + {}, + dld + ) + )}; + this->m_descriptorSet.mappedBuffer = std::shared_ptr( + new UniformBuffer*{mapped}, + [device, memory = *this->m_descriptorSet.buffer.second, dld](auto* ptr) { + device.unmapMemory(memory, dld); + delete ptr; // NOLINT (manual memory management) + } + ); + this->m_descriptorSet.samplers.at(0) = vkhelper::createSampler( + dld, + device, + vk::SamplerAddressMode::eClampToBorder, + vk::CompareOp::eNever, + false + ); + this->m_descriptorSet.samplers.at(1) = vkhelper::createSampler( + dld, + device, + vk::SamplerAddressMode::eClampToBorder, + vk::CompareOp::eNever, + true + ); + this->m_descriptorSet.samplers.at(2) = vkhelper::createSampler( + dld, + device, + vk::SamplerAddressMode::eClampToEdge, + vk::CompareOp::eAlways, + false + ); + + // Update descriptor set bindings + std::vector writeInfos(4 + signature.descriptors.size()); + bindingIdx = 0; + + std::array bufferInfos; + bufferInfos.at(0) = { + .buffer = *this->m_descriptorSet.buffer.first, + .range = VK_WHOLE_SIZE + }; + writeInfos.at(0) = { + .dstSet = this->m_descriptorSet.set, + .dstBinding = bindingIdx++, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eUniformBuffer, + .pBufferInfo = bufferInfos.data() + }; + + std::array samplerInfos; + for (uint32_t i = 0; i < 3; i++) { + auto& writeInfo{writeInfos.at(bindingIdx)}; + + samplerInfos.at(i) = { + .sampler = *this->m_descriptorSet.samplers.at(i) + }; + writeInfo = { + .dstSet = this->m_descriptorSet.set, + .dstBinding = bindingIdx++, + .descriptorCount = 1, + .descriptorType = vk::DescriptorType::eSampler, + .pImageInfo = &samplerInfos.at(i) + }; + } + + std::vector> imageInfos2D(signature.descriptors.size()); + for (const auto& binding : signature.descriptors) { + auto& writeInfo{writeInfos.at(bindingIdx)}; + + auto& imageInfos{imageInfos2D.at(bindingIdx - 4)}; + imageInfos.reserve(binding.resources.size()); + + for (const auto& resourceIdx : binding.resources) { + const auto& image{this->m_images.at(resourceIdx)}; + + for (const auto& subimage : image.subimages) { + imageInfos.push_back({ + .imageView = *subimage.view, + .imageLayout = vk::ImageLayout::eGeneral + }); + } + } + + writeInfo = { + .dstSet = this->m_descriptorSet.set, + .dstBinding = bindingIdx++, + .descriptorCount = static_cast(imageInfos.size()), + .descriptorType = binding.type == BindingType::StorageImage ? + vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage, + .pImageInfo = imageInfos.data() + }; + } + + device.updateDescriptorSets(writeInfos, {}, dld); + + // Build all shader pipelines + std::vector pipelineCreateInfos; + for (const auto& [name, variant] : signature.shaders) { + std::string name2{name}; + if (variant) name2 += hdr ? "_16bit" : "_8bit"; + + const auto& module{library.shader(name2, perf)}; + + pipelineCreateInfos.push_back({ + .stage = { + .stage = vk::ShaderStageFlagBits::eCompute, + .module = *module, + .pName = "main" + }, + .layout = *this->m_layout.pipelineLayout + }); + } + + const std::string_view cacheTag{perf ? "performance" : "quality"}; + auto [cache, isCacheValid] = vkhelper::createPipelineCache( + dld, + device, + physdev, + cacheTag + ); + this->m_cache = std::move(cache); + + std::vector pipelines{ + device.createComputePipelinesUnique( + *this->m_cache, + pipelineCreateInfos, + nullptr, + dld + ).value + }; + + if (!isCacheValid) { + vkhelper::persistPipelineCache( + dld, + device, + physdev, + *this->m_cache, + cacheTag + ); + } + + this->m_pipelines.reserve(signature.shaders.size()); + for (size_t i = 0; i < signature.shaders.size(); i++) { + const auto& name{signature.shaders.at(i).first}; + this->m_pipelines.emplace(name, std::move(pipelines.at(i))); + } + + // Build pipeline stages + std::unordered_map indices; + for (const auto& stageSignature : signature.stages) { + auto& stage{this->m_stages.emplace_back()}; + stage.substages.emplace_back(); + + for (const auto& passIdx : stageSignature.passes) { // (Sorted by shader) + const auto& pass{signature.passes.at(passIdx)}; + + for (const auto& resource : pass.inputs) { + if (!resource.idx()) + continue; + stage.sampledImages.push_back(*resource.idx()); + } + for (const auto& resource : pass.outputs) { + if (!resource.idx()) + continue; + stage.storageImages.push_back(*resource.idx()); + } + + auto& lastPipeline{stage.substages.back().pipeline}; + if (!lastPipeline.empty() && lastPipeline != pass.shader) { + stage.substages.emplace_back(); + } + + auto& substage{stage.substages.back()}; + substage.pipeline = pass.shader; + substage.subiterations.push_back({ + .iterationIndex = indices[substage.pipeline]++, + .dispatch = apply(extent, flowExtent, pass.dispatchOp), + .isSpecial = pass.flags & PassFlag::Special + }); + } + } + + // Transition all images into general layout + this->m_pool = vkhelper::createCommandPool( + dld, + device, + queueFamilyIndex + ); + + std::vector barriers; + for (const auto& image : this->m_images) { + for (const auto& subimage : image.subimages) { + barriers.push_back({ + .newLayout = vk::ImageLayout::eGeneral, + .image = *subimage.image, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = image.subimages.size() == 1 ? image.signature.count : 1 + } + }); + } + } + + const auto layoutCmdbuf{ + vkhelper::createCommandBuffer(dld, device, *this->m_pool) + }; + + layoutCmdbuf->begin({ .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit }, dld); + layoutCmdbuf->pipelineBarrier2KHR({ + .imageMemoryBarrierCount = static_cast(barriers.size()), + .pImageMemoryBarriers = barriers.data() + }, dld); + layoutCmdbuf->end(dld); + + const auto fence{device.createFenceUnique({}, nullptr, dld)}; + queue.submit( + {{ + .commandBufferCount = 1, + .pCommandBuffers = &*layoutCmdbuf + }}, + *fence, + dld + ); + if (device.waitForFences(*fence, VK_TRUE, 50'000'000, dld) != vk::Result::eSuccess) { + throw std::runtime_error("Failed to wait for image layout transition fence"); + } + + for (size_t i = 0; i < signature.splitIndices.size() + 1; i++) { + auto& cmdbuf{this->m_cmdbufs.emplace_back()}; + cmdbuf = vkhelper::createCommandBuffer(dld, device, *this->m_pool); + cmdbuf->begin({ .flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse }, dld); + + cmdbuf->bindDescriptorSets( + vk::PipelineBindPoint::eCompute, + *this->m_layout.pipelineLayout, + 0, + this->m_descriptorSet.set, + {}, + dld + ); + } + + size_t currentStageIndex{0}; + size_t currentStageBound{ + signature.splitIndices.empty() ? signature.passes.size() : signature.splitIndices.front() + }; + + std::vector barrierVector; + barrierVector.reserve(16); + + std::unordered_map stageBarriers; + for (size_t i = 0; i < this->m_stages.size(); i++) { + if (i == currentStageBound) { + currentStageIndex++; + currentStageBound = currentStageIndex < signature.splitIndices.size() ? + signature.splitIndices.at(currentStageIndex) : signature.passes.size(); + } + + const auto& stage{this->m_stages.at(i)}; + const auto& cmdbuf{this->m_cmdbufs.at(currentStageIndex)}; + + // Append barriers for this stage + for (const auto& sampledImage : stage.sampledImages) { + const auto& image = this->m_images.at(sampledImage); + for (const auto& subimage : image.subimages) { + auto imageHandle{static_cast(*subimage.image)}; // NOLINT (32-bit) + if (stageBarriers.contains(imageHandle)) { + stageBarriers[imageHandle].dstAccessMask = vk::AccessFlagBits2::eShaderRead; + continue; + } + + stageBarriers[imageHandle] = { + .srcStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .srcAccessMask = vk::AccessFlagBits2::eNone, + .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .dstAccessMask = vk::AccessFlagBits2::eShaderRead, + .image = *subimage.image, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = image.subimages.size() == 1 ? image.signature.count : 1 + } + }; + } + } + for (const auto& storageImage : stage.storageImages) { + const auto& image = this->m_images.at(storageImage); + for (const auto& subimage : image.subimages) { + auto imageHandle{static_cast(*subimage.image)}; // NOLINT (32-bit) + if (stageBarriers.contains(imageHandle)) { + stageBarriers[imageHandle].dstAccessMask = vk::AccessFlagBits2::eShaderWrite; + continue; + } + + stageBarriers[imageHandle] = { + .srcStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .srcAccessMask = vk::AccessFlagBits2::eNone, + .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .dstAccessMask = vk::AccessFlagBits2::eShaderWrite, + .image = *subimage.image, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = image.subimages.size() == 1 ? image.signature.count : 1 + } + }; + } + } + + + barrierVector.clear(); + for (const auto& [_, barrier] : stageBarriers) // NOLINT (nondeterministic order) + barrierVector.push_back(barrier); + stageBarriers.clear(); + cmdbuf->pipelineBarrier2KHR({ + .imageMemoryBarrierCount = static_cast(barrierVector.size()), + .pImageMemoryBarriers = barrierVector.data() + }, dld); + + for (const auto& substage : stage.substages) { + // Bind shader pipeline for this stage + const auto& pipeline = this->m_pipelines.at(substage.pipeline); + cmdbuf->bindPipeline(vk::PipelineBindPoint::eCompute, *pipeline, dld); + + // Dispatch all subiterations for this stage + for (const auto& subiteration : substage.subiterations) { + const PushConstants pushConstants{ + .specialFlag = subiteration.isSpecial ? 1U : 0U, + .subiteration = subiteration.iterationIndex + }; + cmdbuf->pushConstants( + *this->m_layout.pipelineLayout, + vk::ShaderStageFlagBits::eCompute, + 0, + sizeof(PushConstants), + &pushConstants, + dld + ); + + const auto& dispatch{subiteration.dispatch}; + cmdbuf->dispatch(dispatch.width, dispatch.height, 1, dld); + } + } + + // Append barriers for next stage + for (const auto& sampledImage : stage.sampledImages) { + const auto& image = this->m_images.at(sampledImage); + for (const auto& subimage : image.subimages) { + stageBarriers[static_cast(*subimage.image)] = { + .srcStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .srcAccessMask = vk::AccessFlagBits2::eShaderRead, + .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .dstAccessMask = vk::AccessFlagBits2::eShaderRead, + .image = *subimage.image, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = image.subimages.size() == 1 ? image.signature.count : 1 + } + }; + } + } + for (const auto& storageImage : stage.storageImages) { + const auto& image = this->m_images.at(storageImage); + for (const auto& subimage : image.subimages) { + stageBarriers[static_cast(*subimage.image)] = { + .srcStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .srcAccessMask = vk::AccessFlagBits2::eShaderWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .dstAccessMask = vk::AccessFlagBits2::eShaderRead, + .image = *subimage.image, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = image.subimages.size() == 1 ? image.signature.count : 1 + } + }; + } + } + + // Skip barriers on switch between passes + if (i + 1 == currentStageBound) { + stageBarriers.clear(); + } + } + + for (auto& cmdbuf : this->m_cmdbufs) { + cmdbuf->end(dld); + } +} + +vk::CommandBuffer Pipeline::buildTransCmdbuf( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + uint32_t iteration, + uint32_t index, + uint32_t total +) { + const bool persist{total > 8}; + const uint64_t key{persist ? ((static_cast(index) << 32) | total) : index}; + + if (persist && this->m_transCmdbufs.contains(key)) + return *this->m_transCmdbufs.at(key); + + auto& cmdbuf{this->m_transCmdbufs[key]}; + cmdbuf = vkhelper::createCommandBuffer( + dld, + device, + *this->m_pool + ); + + cmdbuf->begin({ + .flags = persist ? vk::CommandBufferUsageFlagBits::eSimultaneousUse : + vk::CommandBufferUsageFlagBits::eOneTimeSubmit + }, dld); + + vk::BufferMemoryBarrier2KHR barrier{ + .srcStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .srcAccessMask = vk::AccessFlagBits2::eUniformRead, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .buffer = *this->m_descriptorSet.buffer.first, + .size = 4 + }; + cmdbuf->pipelineBarrier2KHR({ + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &barrier + }, dld); + + const UniformBuffer buf{ + .timestamp = static_cast(index + 1) / static_cast(total + 1), + .iteration = iteration + }; + cmdbuf->updateBuffer( + *this->m_descriptorSet.buffer.first, + 0, + 4, + static_cast(&buf.timestamp), + dld + ); + + barrier = { + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + .dstStageMask = vk::PipelineStageFlagBits2::eComputeShader, + .dstAccessMask = vk::AccessFlagBits2::eUniformRead, + .buffer = *this->m_descriptorSet.buffer.first, + .size = 4 + }; + cmdbuf->pipelineBarrier2KHR({ + .bufferMemoryBarrierCount = 1, + .pBufferMemoryBarriers = &barrier + }, dld); + + cmdbuf->end(dld); + + return *cmdbuf; +} diff --git a/lsfg-vk-backend/src/modules/pipeline.hpp b/lsfg-vk-backend/src/modules/pipeline.hpp new file mode 100644 index 0000000..6c81635 --- /dev/null +++ b/lsfg-vk-backend/src/modules/pipeline.hpp @@ -0,0 +1,225 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#pragma once + +#include "library.hpp" +#include "pipeline/signature.hpp" +#include "pipeline/signature/image.hpp" +#include "utility/vkhelper.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace lsfgvk::pipeline { + + /// Handle to an external image + struct ExternalImage { + /// Image Extent + vk::Extent2D extent; + /// Image Format + vk::Format format; + /// Amount of layers in image + uint32_t layers; + + /// Handle to the Vulkan image (not owned) + vk::Image image; + /// Handle to the Vulkan memory (not owned) + vk::DeviceMemory memory; + }; + + /// Struct for the uniform buffer + struct UniformBuffer { + float timestamp; + uint32_t iteration; + uint32_t advancedColorKind; + uint32_t hdrSupport; + float resolutionInvScale; + float uiThreshold; + }; + + /// Struct for push constants + struct PushConstants { + uint32_t specialFlag; + uint32_t subiteration; + }; + + /// + /// Vulkan pipeline created from a signature + /// + class Pipeline { + public: + /// + /// Create a new pipeline + /// + /// @param dld Vulkan dispatch loader + /// @param device Vulkan device + /// @param physdev Vulkan physical device + /// @param queue Vulkan compute queue + /// @param queueFamilyIndex Compute queue family index + /// @param library Shader library + /// @param signature Pipeline signature + /// @param extent Base extent + /// @param flow Flow scale + /// @param perf Performance mode + /// @param hdr HDR variant + /// @throws std::runtime_error on failure + /// + explicit Pipeline( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + const vk::Queue& queue, + uint32_t queueFamilyIndex, + const library::ShaderLibrary& library, + const PipelineSignature& signature, + vk::Extent2D extent, + float flow, + bool perf, + bool hdr + ); + + /// + /// Get all external input images + /// + /// @return List of images + /// + [[nodiscard]] auto& getExternalInputs() const { + return this->m_externalInputs; + } + + /// Get all external output images + [[nodiscard]] auto& getExternalOutputs() const { + return this->m_externalOutputs; + } + + /// + /// Get the mapped uniform buffer + /// + /// @return Mapped uniform buffer + /// + [[nodiscard]] auto* getMappedBuffer() const { + return *this->m_descriptorSet.mappedBuffer.get(); + } + + /// + /// Get all command buffers + /// + /// @return List of command buffers + /// + [[nodiscard]] auto& getCmdbufs() const { + return this->m_cmdbufs; + } + + /// + /// Build a transition command buffer + /// + /// @param dld Vulkan dispatch loader + /// @param device Vulkan device + /// @param iteration Current iteration + /// @param index Index of the iteration + /// @param total Total iterations + /// @return Command buffer handle + /// + vk::CommandBuffer buildTransCmdbuf( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + uint32_t iteration, + uint32_t index, + uint32_t total + ); + + private: + /// Vulkan descriptor set & pipeline layout + struct Layout { + vk::UniqueDescriptorSetLayout layout; + vk::UniquePipelineLayout pipelineLayout; + }; + Layout m_layout; + + /// Sub-image of a Vulkan image + struct SubImage { + vk::UniqueImage image; + vk::MemoryRequirements memory; + vk::UniqueImageView view; + }; + + /// Vulkan image created from an ImageSignature + struct Image { + ImageSignature signature; + std::vector subimages; + vk::DeviceSize size{}; + }; + std::vector m_images; + + std::vector m_externalInputs; + std::vector m_externalOutputs; + + /// Memory allocation sub-segment + struct MemorySubSegment { + vk::DeviceSize size{}; + vk::DeviceSize offset{}; // Offset in memory segment + }; + + /// Memory allocation segment + struct MemorySegment { + size_t imageIdx{}; + std::vector subsegments; + vk::DeviceSize size{}; + vk::DeviceSize offset{}; // Offset in allocation + }; + + /// Memory allocation info + struct AllocationInfo { + vk::UniqueDeviceMemory memory; + std::vector segments; + vk::DeviceSize size{}; + }; + std::array m_allocations; + std::unordered_map m_externalAllocations; + + /// Vulkan descriptor set + struct DescriptorSet { + vk::UniqueDescriptorPool pool; + vk::DescriptorSet set; // Can not be freed + std::pair buffer; + std::shared_ptr mappedBuffer; + std::array samplers; + }; + DescriptorSet m_descriptorSet; + + vk::UniquePipelineCache m_cache; + std::unordered_map m_pipelines; + + /// Single iteration of a sub-stage + struct SubIteration { + uint32_t iterationIndex{}; + vk::Extent2D dispatch; + bool isSpecial{}; + }; + + /// Sub-stage of an execution stage + struct SubStage { + std::string_view pipeline; + std::vector subiterations; + }; + + /// Execution stage + struct Stage { + std::vector substages; + std::vector sampledImages; + std::vector storageImages; + }; + std::vector m_stages; + + vk::UniqueCommandPool m_pool; + std::vector m_cmdbufs; + std::unordered_map m_transCmdbufs; + }; + +} diff --git a/lsfg-vk-backend/src/modules/pipeline/signature.hpp b/lsfg-vk-backend/src/modules/pipeline/signature.hpp new file mode 100644 index 0000000..fb0ea7b --- /dev/null +++ b/lsfg-vk-backend/src/modules/pipeline/signature.hpp @@ -0,0 +1,340 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#pragma once + +#include "signature/helpers.hpp" +#include "signature/image.hpp" +#include "signature/pass.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace lsfgvk::pipeline { + + /// Type of a descriptor set binding + enum class BindingType : uint8_t { + SampledImage, + StorageImage + }; + + /// Signature of a descriptor set binding + struct BindingSignature { + /// Type of binding + BindingType type{ BindingType::SampledImage }; + /// Resources attached to binding + inplace_vector resources; + }; + + /// Signature of a pipeline stage + struct StageSignature { + /// Passes executed this stage + inplace_vector passes; + }; + + /// + /// Signature of a compute pipeline + /// + struct PipelineSignature { + /// Shader names used by the pipeline (and if there are hdr variants) + inplace_vector, 32> shaders; + /// Images used by the pipeline + inplace_vector images; + /// Ordered set of bindings for the descriptor set + inplace_vector descriptors; + /// Indexable list of all passes + inplace_vector passes; + /// Ordered list of stages, excecuted in sequence + inplace_vector stages; + /// Stage index where the command buffers are split + inplace_vector splitIndices; + }; + + /// + /// The signature of a compute pipeline + /// + class PipelineSignatureBuilder { + public: + /// + /// Create a new empty signature builder + /// + explicit PipelineSignatureBuilder() = default; + + /// + /// Register an image + /// + /// @param image Image signature + /// @return Handle to the image + /// + consteval size_t registerImage(ImageSignature image) { + this->m_images.push_back(std::move(image)); + return this->m_images.size() - 1; + } + + /// + /// Append a pass + /// + /// @param pass Pass signature + /// @return Handle to the pass + /// + consteval size_t appendPass(PassSignature pass) { + this->m_passes.push_back(std::move(pass)); + return this->m_passes.size() - 1; + } + + /// + /// Split the command buffer + /// + consteval void split() { + this->m_splitIndices.emplace_back(this->m_passes.size()); + } + + /// + /// Compute a pipeline signature + /// + /// @throws const char* on failure + /// @return Pipeline siganture + /// + consteval PipelineSignature finalize() { + PipelineSignature s{}; + + struct ShaderInfo { + std::string_view id; + bool hasHdrVariant{}; + size_t sampledImageBindings{}; // Only the amount suffices here + std::vector> storageImageBindings; + }; + std::vector shaderInfos; + + // Populate shader map with empty bindings + for (const auto& pass : this->m_passes) { + const auto it{std::ranges::find_if(shaderInfos, [&pass](const auto& shader) { + return shader.id == pass.shader; + })}; + const bool firstOccurrence{it == shaderInfos.end()}; + const bool isAggregatePass{pass.flags & PassFlag::Aggregate}; + + auto& shader{firstOccurrence ? shaderInfos.emplace_back() : *it}; + + if (firstOccurrence) { + shader.id = pass.shader; + shader.hasHdrVariant = pass.flags & PassFlag::HdrVariant; + shader.sampledImageBindings = pass.inputs.size(); + shader.storageImageBindings.resize(pass.outputs.size()); + } + + // Ensure consistent usage aross invocations + if (!firstOccurrence && !isAggregatePass) + throw "Shader \"" + std::string(pass.shader) + "\" is used by " + "multiple passes but does not have the Aggregate flag set"; + + if (shader.sampledImageBindings != pass.inputs.size()) + throw "Shader \"" + std::string(pass.shader) + "\" has " + "inconsistent read counts across passes"; + if (shader.storageImageBindings.size() != pass.outputs.size()) + throw "Shader \"" + std::string(pass.shader) + "\" has " + "inconsistent write counts across passes"; + + // Collect all used resources written by this shader + for (size_t i = 0; i < pass.outputs.size(); i++) { + const auto& resource{pass.outputs.at(i)}; + if (!resource.idx()) + continue; + + const auto& image{this->m_images.at(*resource.idx())}; + if (isAggregatePass && (image.flags & ImageFlag::Mipmaps) && !resource.layer()) + throw "Pass \"" + std::string(pass.shader) + "\" has " + "Aggregate flag but fully writes to an image with Mipmaps flag"; + + shader.storageImageBindings.at(i).push_back(*resource.idx()); + } + } + + // Create descriptors for all resources + for (size_t i = 0; i < this->m_images.size(); i++) { + const auto& image{this->m_images.at(i)}; + if (image.flags & ImageFlag::ExternalInput) { + s.descriptors.push_back({ + .type = BindingType::SampledImage, + .resources = { i } + }); + } + } + for (const auto& shader : shaderInfos) { + for (const auto& resources : shader.storageImageBindings) { + s.descriptors.push_back({ + .type = BindingType::StorageImage, + .resources = resources + }); + + // Skip sampled image bindings for external outputs + const auto& image{this->m_images.at(resources.front())}; + if (image.flags & ImageFlag::ExternalOutput) + continue; + + s.descriptors.push_back({ + .type = BindingType::SampledImage, + .resources = resources + }); + } + } + + // Calculate pipeline stages by reordering passes with dependencies as constraints + std::vector writtenImages; + for (size_t i = 0; i < this->m_images.size(); i++) { + const auto& image{this->m_images.at(i)}; + if (image.flags & ImageFlag::ExternalInput) + writtenImages.push_back(i); + } + + std::vector remainingPasses(this->m_passes.size()); + std::iota(remainingPasses.begin(), remainingPasses.end(), 0); + + size_t currentStageIndex{0}; + std::pair currentStageBounds{ + 0, + this->m_splitIndices.empty() ? this->m_passes.size() : this->m_splitIndices.front() + }; + + while (!remainingPasses.empty()) { + auto& currentStage{s.stages.emplace_back()}; + + // Find all passes that may be executed next + std::vector validPasses{}; + for (const auto& passIdx : remainingPasses) { + if (passIdx < currentStageBounds.first || passIdx >= currentStageBounds.second) + continue; // Skip passes that are not in the current stage + + const auto& pass{this->m_passes.at(passIdx)}; + + bool isValid{true}; + for (const auto& image : pass.inputs) { + if (!image.idx()) + continue; + if (std::ranges::find(writtenImages, *image.idx()) != writtenImages.end()) + continue; + + isValid = false; + break; + } + + if (!isValid) + continue; + + validPasses.push_back(passIdx); + } + + // If no valid pass exists in the current stage, move on to the next stage + if (validPasses.empty() && currentStageIndex < this->m_splitIndices.size()) { + currentStageIndex++; + currentStageBounds = { + currentStageBounds.second, + currentStageIndex < this->m_splitIndices.size() ? + this->m_splitIndices.at(currentStageIndex) : this->m_passes.size() + }; + + s.stages.pop_back(); + s.splitIndices.emplace_back(s.stages.size()); + continue; + } + + // Sort valid passes by shader name + auto begin = std::ranges::begin(validPasses); + auto end = std::ranges::end(validPasses); + for (auto i = begin; i != end; i++) { + std::rotate( + std::upper_bound(begin, i, *i, [this](size_t a, size_t b) { + return this->m_passes.at(a).shader < this->m_passes.at(b).shader; + }), + i, std::next(i) + ); + } + + // Merge passes into execution step + for (const auto& passIdx : validPasses) { + const auto& pass{this->m_passes.at(passIdx)}; + + for (const auto& resource : pass.outputs) { + if (!resource.idx()) + continue; + writtenImages.push_back(*resource.idx()); + } + + currentStage.passes.push_back(passIdx); + remainingPasses.erase(std::ranges::find(remainingPasses, passIdx)); + } + } + + // Calculate usage timeline for each image + for (size_t i = 0; i < this->m_images.size(); i++) { + auto& image{this->m_images.at(i)}; + if (image.flags & ImageFlag::Pinned) + continue; + + std::optional writeIndex; + std::optional readIndex; + + // Find the first stage that writes to the image and last stage that reads from it + for (size_t j = 0; j < s.stages.size(); j++) { + const auto& stage{s.stages.at(j)}; + + for (const auto& passIdx : stage.passes) { + const auto& pass{this->m_passes.at(passIdx)}; + + const bool isRead{ + std::ranges::any_of(pass.inputs, [i](const auto& resource) { + return resource.idx() && *resource.idx() == i; + }) + }; + const bool isWritten{ + std::ranges::any_of(pass.outputs, [i](const auto& resource) { + return resource.idx() && *resource.idx() == i; + }) + }; + + if (writeIndex && isWritten) + throw "Image " + std::to_string(i) + + " is written by multiple passes"; + if (isWritten && isRead) + throw "Image " + std::to_string(i) + + " is read & write in the same pass"; + + if (isWritten) + writeIndex.emplace(j); + if (isRead) + readIndex.emplace(std::max(readIndex.value_or(0), j)); + } + } + + if (!writeIndex) + throw "Image " + std::to_string(i) + " is not written to by any pass"; + if (!readIndex) + throw "Image " + std::to_string(i) + " is not read from by any pass"; + + image.lifetime = { *writeIndex, *readIndex }; + } + + // Copy remaining resources into signature + for (const auto& shader : shaderInfos) + s.shaders.emplace_back(shader.id, shader.hasHdrVariant); + for (const auto& image : this->m_images) + s.images.push_back(image); + for (const auto& pass : this->m_passes) + s.passes.push_back(pass); + return s; + } + private: + std::vector m_images; + std::vector m_passes; + std::vector m_splitIndices; + }; + +} diff --git a/lsfg-vk-backend/src/modules/pipeline/signature/helpers.hpp b/lsfg-vk-backend/src/modules/pipeline/signature/helpers.hpp new file mode 100644 index 0000000..b0f628b --- /dev/null +++ b/lsfg-vk-backend/src/modules/pipeline/signature/helpers.hpp @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace lsfgvk::pipeline { + + /// C++26 backported inplace_vector + template + class inplace_vector { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunsafe-buffer-usage" + public: + // Constructors + constexpr inplace_vector() = default; + constexpr inplace_vector(std::initializer_list init) { + if (init.size() > N) throw std::bad_alloc(); + for (auto& elem : init) + this->m_data.at(this->m_size++) = elem; + } + constexpr inplace_vector(const std::vector& vec) { + if (vec.size() > N) throw std::bad_alloc(); + for (const auto& elem : vec) + this->m_data.at(this->m_size++) = elem; + } + + // Appending elements + constexpr void push_back(const T& value) { + if (this->m_size >= N) throw std::bad_alloc(); + this->m_data.at(this->m_size++) = value; + } + constexpr void push_back(T&& value) { + if (this->m_size >= N) throw std::bad_alloc(); + this->m_data.at(this->m_size++) = std::move(value); + } + template + constexpr T& emplace_back(Args&&... args) { + if (this->m_size >= N) throw std::bad_alloc(); + this->m_data.at(this->m_size) = T(std::forward(args)...); + return this->m_data.at(this->m_size++); + } + constexpr void clear() { this->m_size = 0; } + + // Accessing elements + constexpr T& operator[](size_t idx) { return this->m_data.at(idx); } + constexpr const T& operator[](size_t idx) const { return this->m_data.at(idx); } + [[nodiscard]] constexpr T& at(size_t idx) { + if (idx >= this->m_size) throw std::out_of_range("Index out of range"); + return this->m_data.at(idx); + } + [[nodiscard]] constexpr const T& at(size_t idx) const { + if (idx >= this->m_size) throw std::out_of_range("Index out of range"); + return this->m_data.at(idx); + } + [[nodiscard]] constexpr T& front() { return this->m_data.front(); } + [[nodiscard]] constexpr const T& front() const { return this->m_data.front(); } + [[nodiscard]] constexpr T& back() { return this->m_data.at(this->m_size - 1); } + [[nodiscard]] constexpr const T& back() const { return this->m_data.at(this->m_size - 1); } + + // Iterating elements + [[nodiscard]] constexpr T* begin() { return this->m_data.data(); } + [[nodiscard]] constexpr const T* begin() const { return this->m_data.data(); } + [[nodiscard]] constexpr const T* cbegin() const { return this->m_data.data(); } + [[nodiscard]] constexpr T* end() { return this->m_data.data() + this->m_size; } // NOLINT (pointer arithmetic) + [[nodiscard]] constexpr const T* end() const { return this->m_data.data() + this->m_size; } // NOLINT (pointer arithmetic) + [[nodiscard]] constexpr const T* cend() const { return this->m_data.data() + this->m_size; } // NOLINT (pointer arithmetic) + + // Removing elements + constexpr void pop_back() { + if (this->m_size == 0) throw std::out_of_range("Vector is empty"); + this->m_size--; + } + + // Query capacity + [[nodiscard]] constexpr size_t size() const { return this->m_size; } + [[nodiscard]] constexpr size_t capacity() const { return N; } + [[nodiscard]] constexpr bool empty() const { return this->m_size == 0; } + private: + std::array m_data{}; + size_t m_size{0}; +#pragma clang diagnostic pop + }; + + /// Sequence of operations to apply to the base extent + class ExtentOp { + public: + /// Default constructor for no operations and no flow scaling + constexpr ExtentOp() = default; + /// Constructor for no operations aside from flow scale + constexpr ExtentOp(bool flow) : m_flow(flow) {} + /// Constructor for a single operation + constexpr ExtentOp(bool flow, uint32_t add, uint32_t shift) + : m_flow(flow), m_operations({{add, shift}}) {} + /// Constructor for a single operation starting from the flow base extent + constexpr ExtentOp(uint32_t add, uint32_t shift) + : m_flow(true), m_operations({{add, shift}}) {} + // Combine two extents + constexpr ExtentOp operator+(const ExtentOp& other) const { + ExtentOp result{*this}; + for (const auto& [add, shift] : other.m_operations) + result.m_operations.emplace_back(add, shift); + return result; + } + // Combine two extends + constexpr ExtentOp operator+=(const ExtentOp& other) { + for (const auto& [add, shift] : other.m_operations) + this->m_operations.emplace_back(add, shift); + return *this; + } + /// Get the flow value + [[nodiscard]] constexpr auto flow() const { return this->m_flow; } + /// Get the operations + [[nodiscard]] constexpr const auto& operations() const { return this->m_operations; } + private: + bool m_flow{false}; + inplace_vector, 8> m_operations; + }; + +} diff --git a/lsfg-vk-backend/src/modules/pipeline/signature/image.hpp b/lsfg-vk-backend/src/modules/pipeline/signature/image.hpp new file mode 100644 index 0000000..b9aec58 --- /dev/null +++ b/lsfg-vk-backend/src/modules/pipeline/signature/image.hpp @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#pragma once + +#include "helpers.hpp" + +#include +#include +#include + +namespace lsfgvk::pipeline { + + /// All supported image formats + enum class Format : char { + /// Invalid format + Invalid = 0, + /// 8-bit unsigned normalized RGBA format + RGBA8888 = 37, // VK_FORMAT_R8G8B8A8_UNORM + /// 8-bit unsigned normalized R format + R8 = 9, // VK_FORMAT_R8_UNORM + /// 16-bit signed floating point RGBA format + RGBA16161616 = 97, // VK_FORMAT_R16G16B16A16_SFLOAT + }; + + /// All supported image flags + enum class ImageFlag : char { + /// No special flags + None = 0, + /// Instead of using a single image array, create several individual images with halving + /// extends for each mip level. + /// + /// This will cause the image to show up as Texture2D[], rather than Texture2DArray + /// and must therefore not be used in full with passes where the "Aggregate" flag is set. + Mipmaps = 1 << 0, + /// Indicate that the image is pinned & not transient + Pinned = 1 << 1, + /// Indicate that this image is written to externally + ExternalInput = 1 << 2, + /// Indicate that this image is read from externally + ExternalOutput = 1 << 3, + /// Indicate that a separate format should be used for HDR + HdrVariant = 1 << 4 + }; + + /// Helper type for operating on image flags + class ImageFlags { + public: + /// Default constructor + constexpr ImageFlags() = default; + /// Create from single image flag + constexpr ImageFlags(ImageFlag flag) : m_flags(static_cast(flag)) {} + /// Check any set of flags + constexpr operator bool() const { return m_flags != 0; } + /// Combine with another flag + constexpr ImageFlags operator|(ImageFlag flag) const { + return{this->m_flags | static_cast(flag)}; + } + /// Match with another flag + constexpr ImageFlags operator&(ImageFlag flag) const { + return{this->m_flags & static_cast(flag)}; + } + /// Match with another flag instance + constexpr ImageFlags operator&(ImageFlags other) const { + return{this->m_flags & other.m_flags}; + } + private: + int m_flags{static_cast(ImageFlag::None)}; + + // Create from number + constexpr ImageFlags(int flags) : m_flags(flags) {} + }; + + /// Compine two image flags + constexpr ImageFlags operator|(ImageFlag lhs, ImageFlag rhs) { + return ImageFlags(lhs) | rhs; + } + + /// Signature for an image + struct ImageSignature { + /// Format of the image + Format format{ Format::RGBA8888 }; + /// Optional second format for HDR variants + Format hdrFormat{ Format::RGBA16161616 }; + /// Optional flags for the image + ImageFlags flags{ ImageFlag::None }; + /// Operation applied to the base extent for calculating the image extent + ExtentOp extentOp; + /// Amount of layers in the image + uint32_t count{1}; + + /// Lifetime of the image (set by pipeline builder) + std::pair lifetime; + }; + +} diff --git a/lsfg-vk-backend/src/modules/pipeline/signature/pass.hpp b/lsfg-vk-backend/src/modules/pipeline/signature/pass.hpp new file mode 100644 index 0000000..2a056eb --- /dev/null +++ b/lsfg-vk-backend/src/modules/pipeline/signature/pass.hpp @@ -0,0 +1,88 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#pragma once + +#include "helpers.hpp" + +#include +#include +#include +#include + +namespace lsfgvk::pipeline { + + /// All supported pass flags + enum class PassFlag : char { + /// No special flags + None = 0, + /// Indicates the shader will be reused several times and resources must be + /// aggregated into arrays and indexed via push constants. + Aggregate = 1 << 0, + /// Indicate that the special flag is set via push constant. + Special = 1 << 1, + /// Indicate that there are two variants for 8-bit and 16-bit foramtrs + HdrVariant = 1 << 2 + }; + + /// Helper type for operating on pass flags + class PassFlags { + public: + /// Default constructor + constexpr PassFlags() = default; + /// Create from single pass flag + constexpr PassFlags(PassFlag flag) : m_flags(static_cast(flag)) {} + /// Check any set of flags + constexpr operator bool() const { return m_flags != 0; } + /// Combine with another flag + constexpr PassFlags operator|(PassFlag flag) const { + return{this->m_flags | static_cast(flag)}; + } + /// Match with another flag + constexpr PassFlags operator&(PassFlag flag) const { + return{this->m_flags & static_cast(flag)}; + } + private: + int m_flags{static_cast(PassFlag::None)}; + + // Create from number + constexpr PassFlags(int flags) : m_flags(flags) {} + }; + + /// Combine two pass flags + constexpr PassFlags operator|(PassFlag lhs, PassFlag rhs) { + return PassFlags(lhs) | rhs; + } + + /// A pointer to an image, or a specific layer inside that image + class Resource { + public: + /// Default constructor + constexpr Resource() = default; + /// Constructor for a full image + constexpr Resource(size_t idx) : m_idx(idx) {} + /// Constructor for a single layer + constexpr Resource(size_t idx, uint32_t layer) : m_idx(idx), m_layer(layer) {} + /// Get the flow value + [[nodiscard]] constexpr auto idx() const { return this->m_idx; } + /// Get the operations + [[nodiscard]] constexpr auto layer() const { return this->m_layer; } + private: + std::optional m_idx{0}; + std::optional m_layer; + }; + + /// Signature of a shader pass + struct PassSignature { + /// Name of the shader + std::string_view shader; + /// Optional flags of this pass + PassFlags flags{ PassFlag::None }; + /// Resources to read from + inplace_vector inputs; + /// Resources to write to + inplace_vector outputs; + /// Operation applied to the base extent for calculating the dispatch extent + ExtentOp dispatchOp; + }; + +} diff --git a/lsfg-vk-backend/src/shaderchains/alpha0.cpp b/lsfg-vk-backend/src/shaderchains/alpha0.cpp deleted file mode 100644 index c8229c2..0000000 --- a/lsfg-vk-backend/src/shaderchains/alpha0.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "alpha0.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include - -#include - -using namespace lsfgvk::backend; - -Alpha0::Alpha0(const Ctx& ctx, - const vk::Image& sourceImage) { - const size_t m = ctx.perf ? 1 : 2; // multiplier - const VkExtent2D halfExtent = backend::add_shift_extent(sourceImage.getExtent(), 1, 1); - const VkExtent2D quarterExtent = backend::add_shift_extent(halfExtent, 1, 1); - - // create temporary & output images - this->tempImages0.reserve(m); - this->tempImages1.reserve(m); - for (size_t i = 0; i < m; i++) { - this->tempImages0.emplace_back(ctx.vk, halfExtent); - this->tempImages1.emplace_back(ctx.vk, halfExtent); - } - - this->images.reserve(2 * m); - for (size_t i = 0; i < (2 * m); i++) - this->images.emplace_back(ctx.vk, quarterExtent); - - // create descriptor sets - const auto& shaders = ctx.perf ? ctx.shaders.get().performance : ctx.shaders.get().quality; - this->sets.reserve(3); - this->sets.emplace_back(ManagedShaderBuilder() - .sampled(sourceImage) - .storages(this->tempImages0) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.alpha.at(0))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages0) - .storages(this->tempImages1) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.alpha.at(1))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages1) - .storages(this->images) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.alpha.at(2))); - - // store dispatch extents - this->dispatchExtent0 = backend::add_shift_extent(halfExtent, 7, 3); - this->dispatchExtent1 = backend::add_shift_extent(quarterExtent, 7, 3); -} - -void Alpha0::prepare(std::vector& images) const { - for (size_t i = 0; i < this->tempImages0.size(); i++) { - images.push_back(this->tempImages0.at(i).handle()); - images.push_back(this->tempImages1.at(i).handle()); - } - - for (const auto& image : this->images) - images.push_back(image.handle()); -} - -void Alpha0::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const { - this->sets.at(0).dispatch(vk, cmd, this->dispatchExtent0); - this->sets.at(1).dispatch(vk, cmd, this->dispatchExtent0); - this->sets.at(2).dispatch(vk, cmd, this->dispatchExtent1); -} diff --git a/lsfg-vk-backend/src/shaderchains/alpha0.hpp b/lsfg-vk-backend/src/shaderchains/alpha0.hpp deleted file mode 100644 index 5d48962..0000000 --- a/lsfg-vk-backend/src/shaderchains/alpha0.hpp +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "../helpers/managed_shader.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include - -#include - -namespace ctx { struct Ctx; } - -namespace lsfgvk::backend { - /// pre-alpha shaderchain - class Alpha0 { - public: - /// create a pre-alpha shaderchain - /// @param ctx context - /// @param sourceImage source image - Alpha0(const Ctx& ctx, - const vk::Image& sourceImage); - - /// prepare the shaderchain initially - /// @param images vector to fill with image handles - void prepare(std::vector& images) const; - - /// render the pre-alpha shaderchain - /// @param vk the vulkan instance - /// @param cmd command buffer - void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const; - - /// get the generated images - /// @return vector of images - [[nodiscard]] const auto& getImages() const { return this->images; } - private: - std::vector tempImages0; - std::vector tempImages1; - std::vector images; - - std::vector sets; - VkExtent2D dispatchExtent0{}; - VkExtent2D dispatchExtent1{}; - }; -} diff --git a/lsfg-vk-backend/src/shaderchains/alpha1.cpp b/lsfg-vk-backend/src/shaderchains/alpha1.cpp deleted file mode 100644 index 34bebf0..0000000 --- a/lsfg-vk-backend/src/shaderchains/alpha1.cpp +++ /dev/null @@ -1,54 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "alpha1.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include - -#include - -using namespace lsfgvk::backend; - -Alpha1::Alpha1(const Ctx& ctx, size_t temporal, - const std::vector& sourceImages) { - const size_t m = ctx.perf ? 1 : 2; // multiplier - const VkExtent2D quarterExtent = sourceImages.at(0).getExtent(); - - // create output images for mod3 - this->images.reserve(temporal); - for(size_t i = 0; i < temporal; i++) { - auto& vec = this->images.emplace_back(); - - vec.reserve(2 * m); - for (size_t j = 0; j < (2 * m); j++) - vec.emplace_back(ctx.vk, quarterExtent); - } - - // create descriptor sets - const auto& shaders = ctx.perf ? ctx.shaders.get().performance : ctx.shaders.get().quality; - this->sets.reserve(temporal); - for (size_t i = 0; i < temporal; i++) - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(sourceImages) - .storages(this->images.at(i)) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.alpha.at(3))); - - // store dispatch extents - this->dispatchExtent = backend::add_shift_extent(quarterExtent, 7, 3); -} - -void Alpha1::prepare(std::vector& images) const { - for (const auto& vec : this->images) - for (const auto& img : vec) - images.push_back(img.handle()); -} - -void Alpha1::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const { - this->sets.at(idx % this->sets.size()).dispatch(vk, cmd, dispatchExtent); -} diff --git a/lsfg-vk-backend/src/shaderchains/alpha1.hpp b/lsfg-vk-backend/src/shaderchains/alpha1.hpp deleted file mode 100644 index 47073e8..0000000 --- a/lsfg-vk-backend/src/shaderchains/alpha1.hpp +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "../helpers/managed_shader.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include - -#include - -namespace ctx { struct Ctx; } - -namespace lsfgvk::backend { - /// alpha shaderchain - class Alpha1 { - public: - /// create a alpha shaderchain - /// @param ctx context - /// @param temporal temporal count - /// @param sourceImages source images - Alpha1(const Ctx& ctx, size_t temporal, - const std::vector& sourceImages); - - /// prepare the shaderchain initially - /// @param images vector to fill with image handles - void prepare(std::vector& images) const; - - /// render the alpha shaderchain - /// @param vk the vulkan instance - /// @param cmd command buffer - /// @param idx frame index - void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const; - - /// get the generated images - /// @return vector of images - [[nodiscard]] const auto& getImages() const { return this->images; } - private: - std::vector> images; - - std::vector sets; - VkExtent2D dispatchExtent{}; - }; -} diff --git a/lsfg-vk-backend/src/shaderchains/beta0.cpp b/lsfg-vk-backend/src/shaderchains/beta0.cpp deleted file mode 100644 index 300cef2..0000000 --- a/lsfg-vk-backend/src/shaderchains/beta0.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "beta0.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include - -#include - -using namespace lsfgvk::backend; - -Beta0::Beta0(const Ctx& ctx, - const std::vector>& sourceImages) { - const VkExtent2D extent = sourceImages.at(0).at(0).getExtent(); - - // create output images - this->images.reserve(2); - for(size_t i = 0; i < 2; i++) - this->images.emplace_back(ctx.vk, extent); - - // create descriptor sets - const auto& shader = (ctx.perf ? - ctx.shaders.get().performance : ctx.shaders.get().quality).beta.at(0); - this->sets.reserve(sourceImages.size()); - for (size_t i = 0; i < sourceImages.size(); i++) - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(sourceImages.at((i + (sourceImages.size() - 2)) % sourceImages.size())) - .sampleds(sourceImages.at((i + (sourceImages.size() - 1)) % sourceImages.size())) - .sampleds(sourceImages.at(i % sourceImages.size())) - .storages(this->images) - .sampler(ctx.bnwSampler) - .build(ctx.vk, ctx.pool, shader)); - - // store dispatch extents - this->dispatchExtent = backend::add_shift_extent(extent, 7, 3); -} - -void Beta0::prepare(std::vector& images) const { - for (const auto& img : this->images) - images.push_back(img.handle()); -} - -void Beta0::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const { - this->sets.at(idx % this->sets.size()).dispatch(vk, cmd, dispatchExtent); -} diff --git a/lsfg-vk-backend/src/shaderchains/beta0.hpp b/lsfg-vk-backend/src/shaderchains/beta0.hpp deleted file mode 100644 index fcd9af7..0000000 --- a/lsfg-vk-backend/src/shaderchains/beta0.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "../helpers/managed_shader.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include - -#include - -namespace ctx { struct Ctx; } - -namespace lsfgvk::backend { - /// beta shaderchain - class Beta0 { - public: - /// create a beta shaderchain - /// @param ctx context - /// @param sourceImages source images - Beta0(const Ctx& ctx, - const std::vector>& sourceImages); - - /// prepare the shaderchain initially - /// @param images vector to fill with image handles - void prepare(std::vector& images) const; - - /// render the beta shaderchain - /// @param vk vulkan instance - /// @param cmd command buffer - /// @param idx frame index - void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const; - - /// get the generated images - /// @return vector of images - [[nodiscard]] const auto& getImages() const { return this->images; } - private: - std::vector images; - - std::vector sets; - VkExtent2D dispatchExtent{}; - }; -} diff --git a/lsfg-vk-backend/src/shaderchains/beta1.cpp b/lsfg-vk-backend/src/shaderchains/beta1.cpp deleted file mode 100644 index 2499656..0000000 --- a/lsfg-vk-backend/src/shaderchains/beta1.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "beta1.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include -#include - -#include - -using namespace lsfgvk::backend; - -Beta1::Beta1(const Ctx& ctx, - const std::vector& sourceImages) { - const VkExtent2D extent = sourceImages.at(0).getExtent(); - - // create temporary & output images - this->tempImages0.reserve(2); - this->tempImages1.reserve(2); - for(uint32_t i = 0; i < 2; i++) { - this->tempImages0.emplace_back(ctx.vk, extent); - this->tempImages1.emplace_back(ctx.vk, extent); - } - - this->images.reserve(6); - for (uint32_t i = 0; i < 6; i++) - this->images.emplace_back(ctx.vk, - backend::shift_extent(extent, i), - VK_FORMAT_R8_UNORM); - - // create descriptor sets - const auto& shaders = (ctx.perf ? - ctx.shaders.get().performance : ctx.shaders.get().quality).beta; - this->sets.reserve(4); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(sourceImages) - .storages(this->tempImages0) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.at(1))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages0) - .storages(this->tempImages1) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.at(2))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages1) - .storages(this->tempImages0) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.at(3))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages0) - .storages(this->images) - .sampler(ctx.bnbSampler) - .buffer(ctx.constantBuffer) - .build(ctx.vk, ctx.pool, shaders.at(4))); - - // store dispatch extents - this->dispatchExtent0 = backend::add_shift_extent(extent, 7, 3); - this->dispatchExtent1 = backend::add_shift_extent(extent, 31, 5); -} - -void Beta1::prepare(std::vector& images) const { - for (size_t i = 0; i < 2; i++) { - images.push_back(this->tempImages0.at(i).handle()); - images.push_back(this->tempImages1.at(i).handle()); - } - for (const auto& img : this->images) - images.push_back(img.handle()); -} - -void Beta1::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const { - this->sets.at(0).dispatch(vk, cmd, this->dispatchExtent0); - this->sets.at(1).dispatch(vk, cmd, this->dispatchExtent0); - this->sets.at(2).dispatch(vk, cmd, this->dispatchExtent0); - this->sets.at(3).dispatch(vk, cmd, this->dispatchExtent1); -} diff --git a/lsfg-vk-backend/src/shaderchains/beta1.hpp b/lsfg-vk-backend/src/shaderchains/beta1.hpp deleted file mode 100644 index ea27918..0000000 --- a/lsfg-vk-backend/src/shaderchains/beta1.hpp +++ /dev/null @@ -1,48 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "../helpers/managed_shader.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include - -#include - -namespace ctx { struct Ctx; } - -namespace lsfgvk::backend { - /// beta shaderchain - class Beta1 { - public: - /// create a beta shaderchain - /// @param ctx context - /// @param sourceImages source images - Beta1(const Ctx& ctx, - const std::vector& sourceImages); - - /// prepare the shaderchain initially - /// @param images vector to fill with image handles - void prepare(std::vector& images) const; - - /// render the beta shaderchain - /// @param vk the vulkan instance - /// @param cmd command buffer - void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const; - - /// get the generated images - /// @return vector of images - [[nodiscard]] const auto& getImages() const { return this->images; } - private: - std::vector tempImages0; - std::vector tempImages1; - std::vector images; - - std::vector sets; - VkExtent2D dispatchExtent0{}; - VkExtent2D dispatchExtent1{}; - }; -} diff --git a/lsfg-vk-backend/src/shaderchains/delta0.cpp b/lsfg-vk-backend/src/shaderchains/delta0.cpp deleted file mode 100644 index 8a5c34b..0000000 --- a/lsfg-vk-backend/src/shaderchains/delta0.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "delta0.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include - -#include - -using namespace lsfgvk::backend; - -Delta0::Delta0(const Ctx& ctx, size_t idx, - const std::vector>& sourceImages, - const vk::Image& additionalInput0, - const vk::Image& additionalInput1) { - const size_t m = ctx.perf ? 1 : 2; // multiplier - const VkExtent2D extent = sourceImages.at(0).at(0).getExtent(); - - // create output images - this->images0.reserve(3); - for(size_t i = 0; i < 3; i++) - this->images0.emplace_back(ctx.vk, extent); - this->images1.reserve(m); - for (size_t i = 0; i < m; i++) - this->images1.emplace_back(ctx.vk, extent); - - // create descriptor sets - const auto& shaders = (ctx.perf ? - ctx.shaders.get().performance : ctx.shaders.get().quality).delta; - - this->sets0.reserve(sourceImages.size()); - for (size_t i = 0; i < sourceImages.size(); i++) - this->sets0.emplace_back(ManagedShaderBuilder() - .sampleds(sourceImages.at((i + (sourceImages.size() - 1)) % sourceImages.size())) - .sampleds(sourceImages.at(i % sourceImages.size())) - .sampled(additionalInput0) - .storages(this->images0) - .sampler(ctx.bnwSampler) - .sampler(ctx.eabSampler) - .buffer(ctx.constantBuffers.at(idx)) - .build(ctx.vk, ctx.pool, shaders.at(0))); - - this->sets1.reserve(sourceImages.size()); - for (size_t i = 0; i < sourceImages.size(); i++) - this->sets1.emplace_back(ManagedShaderBuilder() - .sampleds(sourceImages.at((i + (sourceImages.size() - 1)) % sourceImages.size())) - .sampleds(sourceImages.at(i % sourceImages.size())) - .sampled(additionalInput1) - .sampled(additionalInput0) - .storages(this->images1) - .sampler(ctx.bnwSampler) - .sampler(ctx.eabSampler) - .buffer(ctx.constantBuffers.at(idx)) - .build(ctx.vk, ctx.pool, shaders.at(5))); - - // store dispatch extents - this->dispatchExtent = backend::add_shift_extent(extent, 7, 3); -} - -void Delta0::prepare(std::vector& images) const { - for (const auto& img : this->images0) - images.push_back(img.handle()); - for (const auto& img : this->images1) - images.push_back(img.handle()); -} - -void Delta0::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const { - this->sets0.at(idx % this->sets0.size()).dispatch(vk, cmd, dispatchExtent); - this->sets1.at(idx % this->sets1.size()).dispatch(vk, cmd, dispatchExtent); -} diff --git a/lsfg-vk-backend/src/shaderchains/delta0.hpp b/lsfg-vk-backend/src/shaderchains/delta0.hpp deleted file mode 100644 index 4c66780..0000000 --- a/lsfg-vk-backend/src/shaderchains/delta0.hpp +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "../helpers/managed_shader.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include - -#include - -namespace ctx { struct Ctx; } - -namespace lsfgvk::backend { - /// delta shaderchain - class Delta0 { - public: - /// create a delta shaderchain - /// @param ctx context - /// @param idx generated frame index - /// @param sourceImages source images - /// @param additionalInput0 additional input image - /// @param additionalInput1 additional input image - Delta0(const Ctx& ctx, size_t idx, - const std::vector>& sourceImages, - const vk::Image& additionalInput0, - const vk::Image& additionalInput1); - - /// prepare the shaderchain initially - /// @param images vector to fill with image handles - void prepare(std::vector& images) const; - - /// render the delta shaderchain - /// @param vk the vulkan instance - /// @param cmd command buffer - /// @param idx frame index - void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const; - - /// get the generated images - /// @return vector of images - [[nodiscard]] const auto& getImages0() const { return this->images0; } - - /// get the other generated images - /// @return vector of images - [[nodiscard]] const auto& getImages1() const { return this->images1; } - private: - std::vector images0; - std::vector images1; - - std::vector sets0; - std::vector sets1; - VkExtent2D dispatchExtent{}; - }; -} diff --git a/lsfg-vk-backend/src/shaderchains/delta1.cpp b/lsfg-vk-backend/src/shaderchains/delta1.cpp deleted file mode 100644 index 7c81902..0000000 --- a/lsfg-vk-backend/src/shaderchains/delta1.cpp +++ /dev/null @@ -1,110 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "delta1.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include - -#include - -using namespace lsfgvk::backend; - -Delta1::Delta1(const Ctx& ctx, size_t idx, - const std::vector& sourceImages0, - const std::vector& sourceImages1, - const vk::Image& additionalInput0, - const vk::Image& additionalInput1, - const vk::Image& additionalInput2) { - const size_t m = ctx.perf ? 1 : 2; // multiplier - const VkExtent2D extent = sourceImages0.at(0).getExtent(); - - // create temporary & output images - for (size_t i = 0; i < (2 * m); i++) { - this->tempImages0.emplace_back(ctx.vk, extent); - this->tempImages1.emplace_back(ctx.vk, extent); - } - this->image0.emplace(ctx.vk, - VkExtent2D { extent.width, extent.height }, - VK_FORMAT_R16G16B16A16_SFLOAT - ); - this->image1.emplace(ctx.vk, - VkExtent2D { extent.width, extent.height }, - VK_FORMAT_R16G16B16A16_SFLOAT - ); - - // create descriptor sets - const auto& shaders = (ctx.perf ? - ctx.shaders.get().performance : ctx.shaders.get().quality).delta; - this->sets.reserve(4 + 4); - - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(sourceImages0) - .storages(this->tempImages0) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.at(1))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages0) - .storages(this->tempImages1) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.at(2))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages1) - .storages(this->tempImages0) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.at(3))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages0) - .sampled(additionalInput0) - .sampled(additionalInput1) - .storage(*this->image0) - .sampler(ctx.bnbSampler) - .sampler(ctx.eabSampler) - .buffer(ctx.constantBuffers.at(idx)) - .build(ctx.vk, ctx.pool, shaders.at(4))); - - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(sourceImages1) - .storages(this->tempImages0, 0, m) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.at(6))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages0, 0, m) - .storages(this->tempImages1, 0, m) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.at(7))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages1, 0, m) - .storages(this->tempImages0, 0, m) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.at(8))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages0, 0, m) - .sampled(additionalInput2) - .storage(*this->image1) - .sampler(ctx.bnbSampler) - .sampler(ctx.eabSampler) - .buffer(ctx.constantBuffers.at(idx)) - .build(ctx.vk, ctx.pool, shaders.at(9))); - - // store dispatch extents - this->dispatchExtent = backend::add_shift_extent(extent, 7, 3); -} - -void Delta1::prepare(std::vector& images) const { - for (size_t i = 0; i < this->tempImages0.size(); i++) { - images.push_back(this->tempImages0.at(i).handle()); - images.push_back(this->tempImages1.at(i).handle()); - } - images.push_back(this->image0->handle()); - images.push_back(this->image1->handle()); -} - -void Delta1::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const { - for (const auto& set : this->sets) - set.dispatch(vk, cmd, dispatchExtent); -} diff --git a/lsfg-vk-backend/src/shaderchains/delta1.hpp b/lsfg-vk-backend/src/shaderchains/delta1.hpp deleted file mode 100644 index 716df09..0000000 --- a/lsfg-vk-backend/src/shaderchains/delta1.hpp +++ /dev/null @@ -1,62 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "../helpers/managed_shader.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include - -#include - -namespace ctx { struct Ctx; } - -namespace lsfgvk::backend { - /// gamma shaderchain - class Delta1 { - public: - /// create a gamma shaderchain - /// @param ctx context - /// @param idx generated frame index - /// @param sourceImages0 source images - /// @param sourceImages1 source images - /// @param additionalInput0 additional input image - /// @param additionalInput1 additional input image - /// @param additionalInput2 additional input image - Delta1(const Ctx& ctx, size_t idx, - const std::vector& sourceImages0, - const std::vector& sourceImages1, - const vk::Image& additionalInput0, - const vk::Image& additionalInput1, - const vk::Image& additionalInput2); - - /// prepare the shaderchain initially - /// @param images vector to fill with image handles - void prepare(std::vector& images) const; - - /// render the gamma shaderchain - /// @param vk the vulkan instance - /// @param cmd command buffer - void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const; - - /// get the first generated image - /// @return image - [[nodiscard]] const auto& getImage0() const { return *this->image0; } - - /// get the second generated image - /// @return image - [[nodiscard]] const auto& getImage1() const { return *this->image1; } - private: - std::vector tempImages0; - std::vector tempImages1; - ls::lazy image0; - ls::lazy image1; - - std::vector sets; - VkExtent2D dispatchExtent{}; - }; -} diff --git a/lsfg-vk-backend/src/shaderchains/gamma0.cpp b/lsfg-vk-backend/src/shaderchains/gamma0.cpp deleted file mode 100644 index df10fb8..0000000 --- a/lsfg-vk-backend/src/shaderchains/gamma0.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "gamma0.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include - -#include - -using namespace lsfgvk::backend; - -Gamma0::Gamma0(const Ctx& ctx, size_t idx, - const std::vector>& sourceImages, - const vk::Image& additionalInput) { - const VkExtent2D extent = sourceImages.at(0).at(0).getExtent(); - - // create output images - this->images.reserve(3); - for(size_t i = 0; i < 3; i++) - this->images.emplace_back(ctx.vk, extent); - - // create descriptor sets - const auto& shader = (ctx.perf ? - ctx.shaders.get().performance : ctx.shaders.get().quality).gamma.at(0); - this->sets.reserve(sourceImages.size()); - for (size_t i = 0; i < sourceImages.size(); i++) - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(sourceImages.at((i + (sourceImages.size() - 1)) % sourceImages.size())) - .sampleds(sourceImages.at(i % sourceImages.size())) - .sampled(additionalInput) - .storages(this->images) - .sampler(ctx.bnwSampler) - .sampler(ctx.eabSampler) - .buffer(ctx.constantBuffers.at(idx)) - .build(ctx.vk, ctx.pool, shader)); - - // store dispatch extents - this->dispatchExtent = backend::add_shift_extent(extent, 7, 3); -} - -void Gamma0::prepare(std::vector& images) const { - for (const auto& img : this->images) - images.push_back(img.handle()); -} - -void Gamma0::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const { - this->sets.at(idx % this->sets.size()).dispatch(vk, cmd, dispatchExtent); -} diff --git a/lsfg-vk-backend/src/shaderchains/gamma0.hpp b/lsfg-vk-backend/src/shaderchains/gamma0.hpp deleted file mode 100644 index 87e9813..0000000 --- a/lsfg-vk-backend/src/shaderchains/gamma0.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "../helpers/managed_shader.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include - -#include - -namespace ctx { struct Ctx; } - -namespace lsfgvk::backend { - /// gamma shaderchain - class Gamma0 { - public: - /// create a gamma shaderchain - /// @param ctx context - /// @param idx generated frame index - /// @param sourceImages source images - /// @param additionalInput additional input image - Gamma0(const Ctx& ctx, size_t idx, - const std::vector>& sourceImages, - const vk::Image& additionalInput); - - /// prepare the shaderchain initially - /// @param images vector to fill with image handles - void prepare(std::vector& images) const; - - /// render the gamma shaderchain - /// @param vk the vulkan instance - /// @param cmd command buffer - /// @param idx frame index - void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const; - - /// get the generated images - /// @return vector of images - [[nodiscard]] const auto& getImages() const { return this->images; } - private: - std::vector images; - - std::vector sets; - VkExtent2D dispatchExtent{}; - }; -} diff --git a/lsfg-vk-backend/src/shaderchains/gamma1.cpp b/lsfg-vk-backend/src/shaderchains/gamma1.cpp deleted file mode 100644 index 2a7e90f..0000000 --- a/lsfg-vk-backend/src/shaderchains/gamma1.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "gamma1.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include - -#include - -using namespace lsfgvk::backend; - -Gamma1::Gamma1(const Ctx& ctx, size_t idx, - const std::vector& sourceImages, - const vk::Image& additionalInput0, - const vk::Image& additionalInput1) { - const size_t m = ctx.perf ? 1 : 2; // multiplier - const VkExtent2D extent = sourceImages.at(0).getExtent(); - - // create temporary & output images - for (size_t i = 0; i < (2 * m); i++) { - this->tempImages0.emplace_back(ctx.vk, extent); - this->tempImages1.emplace_back(ctx.vk, extent); - } - this->image.emplace(ctx.vk, - VkExtent2D { extent.width, extent.height }, - VK_FORMAT_R16G16B16A16_SFLOAT - ); - - // create descriptor sets - const auto& shaders = (ctx.perf ? - ctx.shaders.get().performance : ctx.shaders.get().quality).gamma; - this->sets.reserve(4); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(sourceImages) - .storages(this->tempImages0) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.at(1))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages0) - .storages(this->tempImages1) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.at(2))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages1) - .storages(this->tempImages0) - .sampler(ctx.bnbSampler) - .build(ctx.vk, ctx.pool, shaders.at(3))); - this->sets.emplace_back(ManagedShaderBuilder() - .sampleds(this->tempImages0) - .sampled(additionalInput0) - .sampled(additionalInput1) - .storage(*this->image) - .sampler(ctx.bnbSampler) - .sampler(ctx.eabSampler) - .buffer(ctx.constantBuffers.at(idx)) - .build(ctx.vk, ctx.pool, shaders.at(4))); - - // store dispatch extents - this->dispatchExtent = backend::add_shift_extent(extent, 7, 3); -} - -void Gamma1::prepare(std::vector& images) const { - for (size_t i = 0; i < this->tempImages0.size(); i++) { - images.push_back(this->tempImages0.at(i).handle()); - images.push_back(this->tempImages1.at(i).handle()); - } - images.push_back(this->image->handle()); -} - -void Gamma1::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const { - for (const auto& set : this->sets) - set.dispatch(vk, cmd, dispatchExtent); -} diff --git a/lsfg-vk-backend/src/shaderchains/gamma1.hpp b/lsfg-vk-backend/src/shaderchains/gamma1.hpp deleted file mode 100644 index 2104cc7..0000000 --- a/lsfg-vk-backend/src/shaderchains/gamma1.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "../helpers/managed_shader.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include - -#include - -namespace ctx { struct Ctx; } - -namespace lsfgvk::backend { - /// gamma shaderchain - class Gamma1 { - public: - /// create a gamma shaderchain - /// @param ctx context - /// @param idx generated frame index - /// @param sourceImages source images - /// @param additionalInput0 additional input image - /// @param additionalInput1 additional input image - Gamma1(const Ctx& ctx, size_t idx, - const std::vector& sourceImages, - const vk::Image& additionalInput0, - const vk::Image& additionalInput1); - - /// prepare the shaderchain initially - /// @param images vector to fill with image handles - void prepare(std::vector& images) const; - - /// render the gamma shaderchain - /// @param vk the vulkan instance - /// @param cmd command buffer - void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const; - - /// get the generated image - /// @return image - [[nodiscard]] const auto& getImage() const { return *this->image; } - private: - std::vector tempImages0; - std::vector tempImages1; - ls::lazy image; - - std::vector sets; - VkExtent2D dispatchExtent{}; - }; -} diff --git a/lsfg-vk-backend/src/shaderchains/generate.cpp b/lsfg-vk-backend/src/shaderchains/generate.cpp deleted file mode 100644 index 2375ccf..0000000 --- a/lsfg-vk-backend/src/shaderchains/generate.cpp +++ /dev/null @@ -1,57 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "generate.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include -#include - -#include - -using namespace lsfgvk::backend; - -Generate::Generate(const Ctx& ctx, size_t idx, - const std::pair& sourceImages, - const vk::Image& inputImage1, - const vk::Image& inputImage2, - const vk::Image& inputImage3, - const vk::Image& outputImage) { - // create descriptor sets - const auto& shader = ctx.hdr ? - ctx.shaders.get().generate_hdr : ctx.shaders.get().generate; - this->sets.reserve(2); - this->sets.emplace_back(ManagedShaderBuilder() - .sampled(sourceImages.second) - .sampled(sourceImages.first) - .sampled(inputImage1) - .sampled(inputImage2) - .sampled(inputImage3) - .storage(outputImage) - .sampler(ctx.bnbSampler) - .sampler(ctx.eabSampler) - .buffer(ctx.constantBuffers.at(idx)) - .build(ctx.vk, ctx.pool, shader)); - this->sets.emplace_back(ManagedShaderBuilder() - .sampled(sourceImages.first) - .sampled(sourceImages.second) - .sampled(inputImage1) - .sampled(inputImage2) - .sampled(inputImage3) - .storage(outputImage) - .sampler(ctx.bnbSampler) - .sampler(ctx.eabSampler) - .buffer(ctx.constantBuffers.at(idx)) - .build(ctx.vk, ctx.pool, shader)); - - // store dispatch extent - this->dispatchExtent = backend::add_shift_extent(ctx.sourceExtent, 15, 4); -} - -void Generate::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const { - this->sets.at(idx % 2).dispatch(vk, cmd, this->dispatchExtent); -} diff --git a/lsfg-vk-backend/src/shaderchains/generate.hpp b/lsfg-vk-backend/src/shaderchains/generate.hpp deleted file mode 100644 index 5e0b349..0000000 --- a/lsfg-vk-backend/src/shaderchains/generate.hpp +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "../helpers/managed_shader.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include - -#include - -namespace ctx { struct Ctx; } - -namespace lsfgvk::backend { - /// generate shaderchain - class Generate { - public: - /// create a generate shaderchain - /// @param ctx context - /// @param idx generated frame index - /// @param sourceImages pair of source images - /// @param inputImage1 input image 1 - /// @param inputImage2 input image 2 - /// @param inputImage3 input image 3 - Generate(const Ctx& ctx, size_t idx, - const std::pair& sourceImages, - const vk::Image& inputImage1, - const vk::Image& inputImage2, - const vk::Image& inputImage3, - const vk::Image& outputImage); - - /// render the generate shaderchain - /// @param vk the vulkan instance - /// @param cmd command buffer - /// @param idx frame index - void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const; - private: - std::vector sets; - VkExtent2D dispatchExtent{}; - }; -} diff --git a/lsfg-vk-backend/src/shaderchains/mipmaps.cpp b/lsfg-vk-backend/src/shaderchains/mipmaps.cpp deleted file mode 100644 index 2f04669..0000000 --- a/lsfg-vk-backend/src/shaderchains/mipmaps.cpp +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#include "mipmaps.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/helpers/pointers.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include -#include -#include - -#include - -using namespace lsfgvk::backend; - -Mipmaps::Mipmaps(const Ctx& ctx, - const std::pair& sourceImages) { - // create output images for base and 6 mips - this->images.reserve(7); - for (uint32_t i = 0; i < 7; i++) - this->images.emplace_back(ctx.vk, - backend::shift_extent(ctx.flowExtent, i), VK_FORMAT_R8_UNORM); - - // create descriptor sets for both input images - this->sets.reserve(2); - this->sets.emplace_back(ManagedShaderBuilder() - .sampled(sourceImages.first) - .storages(this->images) - .sampler(ctx.bnbSampler) - .buffer(ctx.constantBuffer) - .build(ctx.vk, ctx.pool, ctx.shaders.get().mipmaps)); - this->sets.emplace_back(ManagedShaderBuilder() - .sampled(sourceImages.second) - .storages(this->images) - .sampler(ctx.bnbSampler) - .buffer(ctx.constantBuffer) - .build(ctx.vk, ctx.pool, ctx.shaders.get().mipmaps)); - - // store dispatch extent - this->dispatchExtent = backend::add_shift_extent(ctx.flowExtent, 63, 6); -} - -void Mipmaps::prepare(std::vector& images) const { - for (const auto& img : this->images) - images.push_back(img.handle()); -} - -void Mipmaps::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const { - this->sets.at(idx % 2).dispatch(vk, cmd, this->dispatchExtent); -} diff --git a/lsfg-vk-backend/src/shaderchains/mipmaps.hpp b/lsfg-vk-backend/src/shaderchains/mipmaps.hpp deleted file mode 100644 index 8c1f6a2..0000000 --- a/lsfg-vk-backend/src/shaderchains/mipmaps.hpp +++ /dev/null @@ -1,47 +0,0 @@ -/* SPDX-License-Identifier: GPL-3.0-or-later */ - -#pragma once - -#include "../helpers/managed_shader.hpp" -#include "../helpers/utils.hpp" -#include "lsfg-vk-common/vulkan/command_buffer.hpp" -#include "lsfg-vk-common/vulkan/image.hpp" -#include "lsfg-vk-common/vulkan/vulkan.hpp" - -#include -#include - -#include - -namespace ctx { struct Ctx; } - -namespace lsfgvk::backend { - /// mipmaps shaderchain - class Mipmaps { - public: - /// create a mipmaps shaderchain - /// @param ctx context - /// @param sourceImages pair of source images - Mipmaps(const Ctx& ctx, - const std::pair& sourceImages); - - /// prepare the shaderchain initially - /// @param images vector to fill with image handles - void prepare(std::vector& images) const; - - /// render the mipmaps shaderchain - /// @param vk the vulkan instance - /// @param cmd command buffer - /// @param idx frame index - void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const; - - /// get the generated mipmap images - /// @return vector of images - [[nodiscard]] const auto& getImages() const { return this->images; } - private: - std::vector images; - - std::vector sets; - VkExtent2D dispatchExtent{}; - }; -} diff --git a/lsfg-vk-backend/src/utility/pipelines.cpp b/lsfg-vk-backend/src/utility/pipelines.cpp new file mode 100644 index 0000000..60ef915 --- /dev/null +++ b/lsfg-vk-backend/src/utility/pipelines.cpp @@ -0,0 +1,578 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#include "pipelines.hpp" +#include "modules/pipeline/signature.hpp" +#include "modules/pipeline/signature/helpers.hpp" +#include "modules/pipeline/signature/image.hpp" +#include "modules/pipeline/signature/pass.hpp" + +#include +#include +#include + +using namespace lsfgvk; + +namespace { + using namespace lsfgvk::pipeline; + + /// Build the pipeline signature + consteval PipelineSignature buildPipelineSignature(bool perf) { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wshadow" + PipelineSignatureBuilder s; + + const Resource INVALID{}; + + auto sourceImageArray = s.registerImage({ + .format = Format::RGBA8888, + .hdrFormat = Format::RGBA16161616, + .flags = ImageFlag::Pinned | + ImageFlag::ExternalInput | + ImageFlag::HdrVariant, + .count = 2 + }); + + /* Pre-pass */ + + auto mipmapImageArray = s.registerImage({ + .format = Format::R8, + .flags = ImageFlag::Mipmaps, + .extentOp = { true }, + .count = 7 + }); + + s.appendPass({ + .shader = "mipmaps", + .inputs{ + sourceImageArray + }, + .outputs{ + mipmapImageArray + }, + .dispatchOp = { 63, 6 } + }); + + std::vector alphaArray(7); + std::vector alphaExtents(7); + for (uint32_t i = 0; i < 7; i++) { + const uint32_t mul = perf ? 1 : 2; + const ExtentOp dispatch = { 7, 3 }; + + ExtentOp extent = { 0, 6 - i }; + extent += { 1, 1 }; + + auto flipflop0 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 1 * mul + }); + + s.appendPass({ + .shader = "alpha0", + .flags = PassFlag::Aggregate, + .inputs{ + { mipmapImageArray, 6 - i } + }, + .outputs{ + flipflop0 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop1 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 1 * mul + }); + + s.appendPass({ + .shader = "alpha1", + .flags = PassFlag::Aggregate, + .inputs{ + flipflop0 + }, + .outputs{ + flipflop1 + }, + .dispatchOp = extent + dispatch + }); + + extent += { 1, 1 }; + + auto flipflop2 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 2 * mul + }); + + s.appendPass({ + .shader = "alpha2", + .flags = PassFlag::Aggregate, + .inputs{ + flipflop1 + }, + .outputs{ + flipflop2 + }, + .dispatchOp = extent + dispatch + }); + + auto result = s.registerImage({ + .format = Format::RGBA8888, + .flags = ImageFlag::Pinned, + .extentOp = extent, + .count = (2 * mul) * 3 + }); + + s.appendPass({ + .shader = "alpha3", + .flags = PassFlag::Aggregate, + .inputs{ + flipflop2 + }, + .outputs{ + result + }, + .dispatchOp = extent + dispatch + }); + + alphaArray.at(6 - i) = result; + alphaExtents.at(6 - i) = extent; + } + + ExtentOp extent = alphaExtents.at(0); + ExtentOp dispatch = { 7, 3 }; + + auto flipflop0 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 2 + }); + + s.appendPass({ + .shader = "beta0", + .inputs{ + alphaArray.at(0) + }, + .outputs{ + flipflop0 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop1 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 2 + }); + + s.appendPass({ + .shader = "beta1", + .inputs{ + flipflop0 + }, + .outputs{ + flipflop1 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop2 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 2 + }); + + s.appendPass({ + .shader = "beta2", + .inputs{ + flipflop1 + }, + .outputs{ + flipflop2 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop3 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 2 + }); + + s.appendPass({ + .shader = "beta3", + .inputs{ + flipflop2 + }, + .outputs{ + flipflop3 + }, + .dispatchOp = extent + dispatch + }); + + auto betaImageArray = s.registerImage({ + .format = Format::R8, + .flags = ImageFlag::Mipmaps, + .extentOp = extent, + .count = 6 + }); + + dispatch = { 31, 5 }; + + s.appendPass({ + .shader = "beta4", + .inputs{ + flipflop3 + }, + .outputs{ + betaImageArray + }, + .dispatchOp = extent + dispatch + }); + + /* Main-pass */ + + s.split(); + + std::vector gammaArray(7); + std::vector deltaArray(3); + std::vector epsilonArray(3); + for (uint32_t i = 0; i < 7; i++) { + const uint32_t mul = perf ? 1 : 2; + const ExtentOp dispatch = { 7, 3 }; + const ExtentOp extent = alphaExtents.at(6 - i); + + auto flipflop0 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 3 + }); + + s.appendPass({ + .shader = "gamma0", + .flags = PassFlag::Aggregate + | (i == 0 ? PassFlag::Special : PassFlag::None), + .inputs{ + alphaArray.at(6 - i), + i == 0 ? INVALID : gammaArray.at(i - 1) + }, + .outputs{ + flipflop0 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop1 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 2 * mul + }); + + s.appendPass({ + .shader = "gamma1", + .flags = PassFlag::Aggregate, + .inputs{ + flipflop0 + }, + .outputs{ + flipflop1 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop2 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 2 * mul + }); + + s.appendPass({ + .shader = "gamma2", + .flags = PassFlag::Aggregate, + .inputs{ + flipflop1 + }, + .outputs{ + flipflop2 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop3 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 2 * mul + }); + + s.appendPass({ + .shader = "gamma3", + .flags = PassFlag::Aggregate, + .inputs{ + flipflop2 + }, + .outputs{ + flipflop3 + }, + .dispatchOp = extent + dispatch + }); + + auto result = s.registerImage({ + .format = Format::RGBA16161616, + .extentOp = extent + }); + + s.appendPass({ + .shader = "gamma4", + .flags = PassFlag::Aggregate + | (i == 0 ? PassFlag::Special : PassFlag::None), + .inputs{ + flipflop3, + i == 0 ? INVALID : gammaArray.at(i - 1), + { betaImageArray, i == 0 ? 5 : (6 - i) } + }, + .outputs{ + result + }, + .dispatchOp = extent + dispatch + }); + + gammaArray.at(i) = result; + + if (i >= 4) { + auto flipflop0 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 3 + }); + + s.appendPass({ + .shader = "delta0", + .flags = PassFlag::Aggregate + | (i == 4 ? PassFlag::Special : PassFlag::None), + .inputs{ + alphaArray.at(6 - i), + i == 4 ? INVALID : deltaArray.at(i - 5) + }, + .outputs{ + flipflop0 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop1 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 2 * mul + }); + + s.appendPass({ + .shader = "delta1", + .flags = PassFlag::Aggregate, + .inputs{ + flipflop0 + }, + .outputs{ + flipflop1 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop2 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 2 * mul + }); + + s.appendPass({ + .shader = "delta2", + .flags = PassFlag::Aggregate, + .inputs{ + flipflop1 + }, + .outputs{ + flipflop2 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop3 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = 2 * mul + }); + + s.appendPass({ + .shader = "delta3", + .flags = PassFlag::Aggregate, + .inputs{ + flipflop2 + }, + .outputs{ + flipflop3 + }, + .dispatchOp = extent + dispatch + }); + + auto result = s.registerImage({ + .format = Format::RGBA16161616, + .extentOp = extent, + .count = 1 + }); + + s.appendPass({ + .shader = "delta4", + .flags = PassFlag::Aggregate + | (i == 4 ? PassFlag::Special : PassFlag::None), + .inputs{ + flipflop3, + i == 4 ? INVALID : deltaArray.at(i - 5), + { betaImageArray, 6 - i } + }, + .outputs{ + result + }, + .dispatchOp = extent + dispatch + }); + + deltaArray.at(i - 4) = result; + } + + if (i >= 4) { + auto flipflop0 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = mul + }); + + s.appendPass({ + .shader = "epsilon0", + .flags = PassFlag::Aggregate + | (i == 4 ? PassFlag::Special : PassFlag::None), + .inputs{ + alphaArray.at(6 - i), + gammaArray.at(i - 1), + i == 4 ? INVALID : deltaArray.at(i - 5) + }, + .outputs{ + flipflop0 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop1 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = mul + }); + + s.appendPass({ + .shader = "epsilon1", + .flags = PassFlag::Aggregate, + .inputs{ + flipflop0 + }, + .outputs{ + flipflop1 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop2 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = mul + }); + + s.appendPass({ + .shader = "epsilon2", + .flags = PassFlag::Aggregate, + .inputs{ + flipflop1 + }, + .outputs{ + flipflop2 + }, + .dispatchOp = extent + dispatch + }); + + auto flipflop3 = s.registerImage({ + .format = Format::RGBA8888, + .extentOp = extent, + .count = mul + }); + + s.appendPass({ + .shader = "epsilon3", + .flags = PassFlag::Aggregate, + .inputs{ + flipflop2 + }, + .outputs{ + flipflop3 + }, + .dispatchOp = extent + dispatch + }); + + auto result = s.registerImage({ + .format = Format::RGBA16161616, + .extentOp = extent, + .count = 1 + }); + + s.appendPass({ + .shader = "epsilon4", + .flags = PassFlag::Aggregate + | (i == 4 ? PassFlag::Special : PassFlag::None), + .inputs{ + flipflop3, + i == 4 ? INVALID : epsilonArray.at(i - 5) + }, + .outputs{ + result + }, + .dispatchOp = extent + dispatch + }); + + epsilonArray.at(i - 4) = result; + } + } + + extent = { false }; + dispatch = { 15, 4 }; + + auto result = s.registerImage({ + .format = Format::RGBA8888, + .hdrFormat = Format::RGBA16161616, + .flags = ImageFlag::Pinned + | ImageFlag::ExternalOutput + | ImageFlag::HdrVariant, + .extentOp = extent, + .count = 1 + }); + + s.appendPass({ + .shader = "generate", + .flags = PassFlag::HdrVariant, + .inputs{ + sourceImageArray, + gammaArray.at(6), + deltaArray.at(2), + epsilonArray.at(2) + }, + .outputs{ + result + }, + .dispatchOp = extent + dispatch + }); + + return s.finalize(); +#pragma clang diagnostic pop + } +} + +const PipelineSignature& lsfgvk::getPipelineSignature(bool perf) { + static const PipelineSignature signature = buildPipelineSignature(false); + static const PipelineSignature perfSignature = buildPipelineSignature(true); + return perf ? perfSignature : signature; +} diff --git a/lsfg-vk-backend/src/utility/pipelines.hpp b/lsfg-vk-backend/src/utility/pipelines.hpp new file mode 100644 index 0000000..6f15e4e --- /dev/null +++ b/lsfg-vk-backend/src/utility/pipelines.hpp @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#pragma once + +#include "modules/pipeline/signature.hpp" + +namespace lsfgvk { + + /// + /// Get the pipeline signature + /// + /// @param perf Performance mode + /// @return Pipeline signature + /// + const pipeline::PipelineSignature& getPipelineSignature(bool perf); + +} diff --git a/lsfg-vk-backend/src/utility/vkhelper.cpp b/lsfg-vk-backend/src/utility/vkhelper.cpp new file mode 100644 index 0000000..499b9c6 --- /dev/null +++ b/lsfg-vk-backend/src/utility/vkhelper.cpp @@ -0,0 +1,656 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#include "vkhelper.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* Device initialization */ + +vk::UniqueInstance vkhelper::createInstance(vk::detail::DispatchLoaderDynamic& dld) { + dld.init(); + + const vk::ApplicationInfo appInfo{ + .pApplicationName = "lsfg-vk", + .applicationVersion = vk::makeVersion(2, 0, 0), + .pEngineName = "lsfg-vk", + .engineVersion = vk::makeVersion(2, 0, 0), + .apiVersion = vk::ApiVersion12 // Fully supported by all Vulkan-capable GPUs + }; + const vk::InstanceCreateInfo instanceInfo{ + .pApplicationInfo = &appInfo + }; + auto instance{vk::createInstanceUnique(instanceInfo, nullptr, dld)}; + dld.init(*instance); + + return instance; +} + +vk::PhysicalDevice vkhelper::findPhysicalDevice( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Instance& instance, + const std::string& id +) { + for (const auto& physdev : instance.enumeratePhysicalDevices(dld)) { + // Check for VK_EXT_pci_bus_info + bool supportsPCIEXT{false}; + for (const auto& ext : physdev.enumerateDeviceExtensionProperties(nullptr, dld)) { + if (std::string(ext.extensionName) != vk::EXTPciBusInfoExtensionName) + continue; + + supportsPCIEXT = true; + break; + } + + // Fetch properties + vk::PhysicalDevicePCIBusInfoPropertiesEXT busInfo{}; + vk::PhysicalDeviceProperties2 info{ + .pNext = supportsPCIEXT ? &busInfo : nullptr + }; + physdev.getProperties2(&info, dld); + + auto& props{info.properties}; + + // Check first if id is not given + if (id.empty()) + return physdev; + + // Compare device name + props.deviceName.back() = '\0'; // Ensure null-termination + if (id == std::string(props.deviceName)) + return physdev; + + // Compare Vendor ID + Device ID + std::ostringstream gpuss; + gpuss << std::hex << std::setfill('0') + << std::setw(4) << props.vendorID << ":" + << std::setw(4) << props.deviceID; + if (id == gpuss.str()) + return physdev; + + // Compare PCI bus ID + if (!supportsPCIEXT) + continue; + + std::ostringstream pciss; + pciss << std::hex << std::setfill('0') + << std::setw(4) << busInfo.pciDomain << ":" + << std::setw(2) << busInfo.pciBus << ":" + << std::setw(2) << busInfo.pciDevice << "." + << std::setw(1) << busInfo.pciFunction; + if (id == pciss.str()) + return physdev; + } + + throw std::runtime_error("No physical device matching '" + id + "' found"); +} + +uint32_t vkhelper::findComputeQueueFamilyIndex( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::PhysicalDevice& physdev +) { + uint32_t idx{0}; + for (const auto& qfi : physdev.getQueueFamilyProperties2(dld)) { + if (qfi.queueFamilyProperties.queueFlags & vk::QueueFlagBits::eCompute) + return idx; + idx++; + } + + throw std::runtime_error("No compute-capable queue family found"); +} + +bool vkhelper::checkHalfPrecisionSupport( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::PhysicalDevice& physdev +) { + vk::PhysicalDeviceVulkan12Features featuresVulkan12{}; + vk::PhysicalDeviceFeatures2 features{ + .pNext = &featuresVulkan12 + }; + physdev.getFeatures2(&features, dld); + return featuresVulkan12.shaderFloat16; +} + +std::pair vkhelper::createDevice( + vk::detail::DispatchLoaderDynamic& dld, + const vk::PhysicalDevice& physdev, + uint32_t qfi, + bool fp16 +) { + constexpr std::array EXTENSIONS{ + vk::KHRSynchronization2ExtensionName, + vk::KHRExternalMemoryFdExtensionName, + vk::KHRExternalSemaphoreFdExtensionName + }; + + vk::PhysicalDeviceSynchronization2FeaturesKHR sync2Info{ + .synchronization2 = VK_TRUE + }; + const vk::PhysicalDeviceVulkan12Features vk12Info{ + .pNext = &sync2Info, + .shaderFloat16 = fp16, + .timelineSemaphore = VK_TRUE + }; + const float queuePriority{1.0F}; // Highest priority + const vk::DeviceQueueCreateInfo queueInfo{ + .queueFamilyIndex = qfi, + .queueCount = 1, + .pQueuePriorities = &queuePriority + }; + const vk::DeviceCreateInfo deviceInfo{ + .pNext = &vk12Info, + .queueCreateInfoCount = 1, + .pQueueCreateInfos = &queueInfo, + .enabledExtensionCount = static_cast(EXTENSIONS.size()), + .ppEnabledExtensionNames = EXTENSIONS.data() + }; + auto device{physdev.createDeviceUnique(deviceInfo, nullptr, dld)}; + dld.init(*device); + + return{ + std::move(device), + device->getQueue(qfi, 0, dld) + }; +} + +/* Shader modules & pipelines */ + +vk::UniqueShaderModule vkhelper::createShaderModule( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const std::span& code +) { + const vk::ShaderModuleCreateInfo shaderInfo{ + .codeSize = code.size() * sizeof(uint32_t), + .pCode = code.data() + }; + return device.createShaderModuleUnique(shaderInfo, nullptr, dld); +} + +namespace { + /// Find the cache file path + std::filesystem::path findPipelineCache( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::PhysicalDevice& physdev, + std::string_view tag + ) { + // First find the base path + std::filesystem::path path{"/tmp/lsfg-vk"}; + + const char* xdgCacheHome{std::getenv("XDG_CACHE_HOME")}; + if (xdgCacheHome && *xdgCacheHome != '\0') + path = std::filesystem::path(xdgCacheHome) / "lsfg-vk"; + + const char* home{std::getenv("HOME")}; + if (home && *home != '\0') + path = std::filesystem::path(home) / ".cache" / "lsfg-vk"; + + // Ensure the directory exists + if (!std::filesystem::exists(path)) + std::filesystem::create_directories(path); + + // Calculate the physical device UUID + vk::PhysicalDeviceProperties2 info{}; + physdev.getProperties2(&info, dld); + + std::ostringstream ss; + ss << std::hex << std::setfill('0'); + for (uint32_t i = 0; i < 16; i++) { + ss << std::setw(2) << static_cast(info.properties.pipelineCacheUUID.at(i)); + if (i == 3 || i == 5 || i == 7 || i == 9) { + ss << "-"; + } + } + + // Return the full path + return path / ("cache_" + std::string(tag) + "_" + ss.str() + ".bin"); + } +} + +std::pair vkhelper::createPipelineCache( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + std::string_view tag +) { + const std::filesystem::path path{findPipelineCache(dld, physdev, tag)}; + const bool valid{std::filesystem::exists(path) && std::filesystem::file_size(path) > 32}; + + // Read cache data (if any) + std::vector cacheData{}; + if (std::filesystem::exists(path)) { + std::ifstream file(path, std::ios::binary | std::ios::ate); + if (!file.is_open()) + throw std::runtime_error("Unable to open pipeline cache file for reading"); + + const std::streamsize size{static_cast(file.tellg())}; + cacheData = std::vector(static_cast(size)); + + file.seekg(0, std::ios::beg); + if (!file.read(reinterpret_cast(cacheData.data()), size)) // NOLINT (unsafe cast) + throw std::runtime_error("Unable to read pipeline cache file"); + } + + // Build pipeline cache + const vk::PipelineCacheCreateInfo pipelineCacheInfo{ + .initialDataSize = cacheData.size(), + .pInitialData = cacheData.data() + }; + return { device.createPipelineCacheUnique(pipelineCacheInfo, nullptr, dld), valid }; +} + +void vkhelper::persistPipelineCache( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + const vk::PipelineCache& cache, + std::string_view tag +) { + const std::filesystem::path path{findPipelineCache(dld, physdev, tag)}; + + std::ofstream file(path, std::ios::binary | std::ios::trunc); + if (!file.is_open()) + throw std::runtime_error("Unable to open pipeline cache file for writing"); + + const std::vector cacheData{ + device.getPipelineCacheData(cache, dld) + }; + file.write( + reinterpret_cast(cacheData.data()), // NOLINT (unsafe cast) + static_cast(cacheData.size()) + ); + + file.flush(); + file.close(); +} + +std::pair vkhelper::createLayout( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const std::vector& bindings, + size_t pushConstantSize +) { + const vk::DescriptorSetLayoutCreateInfo layoutInfo{ + .flags = vk::DescriptorSetLayoutCreateFlagBits::eUpdateAfterBindPool, + .bindingCount = static_cast(bindings.size()), + .pBindings = bindings.data() + }; + auto descriptorSetLayout{device.createDescriptorSetLayoutUnique(layoutInfo, nullptr, dld)}; + + const vk::PushConstantRange pushConstantRange{ + .stageFlags = vk::ShaderStageFlagBits::eCompute, + .size = static_cast(pushConstantSize) + }; + const vk::PipelineLayoutCreateInfo pipelineLayoutInfo{ + .setLayoutCount = 1, + .pSetLayouts = &*descriptorSetLayout, + .pushConstantRangeCount = 1, + .pPushConstantRanges = &pushConstantRange + }; + auto pipelineLayout{device.createPipelineLayoutUnique(pipelineLayoutInfo, nullptr, dld)}; + + return { std::move(descriptorSetLayout), std::move(pipelineLayout) }; +} + +/* Resources */ + +vk::UniqueImage vkhelper::createImage( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + vk::Extent2D extent, + vk::Format format, + uint32_t layers, + vk::ImageUsageFlags usage +) { + const vk::ImageCreateInfo imageInfo{ + .imageType = vk::ImageType::e2D, + .format = format, + .extent = { + .width = extent.width, + .height = extent.height, + .depth = 1 + }, + .mipLevels = 1, + .arrayLayers = layers, + .samples = vk::SampleCountFlagBits::e1, + .usage = usage + }; + return device.createImageUnique(imageInfo, nullptr, dld); +} + +vk::UniqueSampler vkhelper::createSampler( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + vk::SamplerAddressMode mode, + vk::CompareOp compare, + bool white +) { + const vk::SamplerCreateInfo samplerInfo{ + .magFilter = vk::Filter::eLinear, + .minFilter = vk::Filter::eLinear, + .mipmapMode = vk::SamplerMipmapMode::eLinear, + .addressModeU = mode, + .addressModeV = mode, + .addressModeW = mode, + .compareOp = compare, + .maxLod = vk::LodClampNone, + .borderColor = white ? + vk::BorderColor::eFloatOpaqueWhite : vk::BorderColor::eFloatTransparentBlack + }; + return device.createSamplerUnique(samplerInfo, nullptr, dld); +} + +std::pair vkhelper::createBuffer( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + vk::BufferUsageFlags usage, + const void* data, + size_t size +) { + // Create buffer + const vk::BufferCreateInfo bufferInfo{ + .size = size, + .usage = usage, + .sharingMode = vk::SharingMode::eExclusive + }; + auto buffer{device.createBufferUnique(bufferInfo, nullptr, dld)}; + + // Allocate memory + const auto requirements{device.getBufferMemoryRequirements(*buffer, dld)}; + + auto memory{vkhelper::allocateMemory( + dld, + device, + physdev, + requirements.size, + requirements.memoryTypeBits, + true + )}; + + // Bind memory + device.bindBufferMemory(*buffer, *memory, 0, dld); + + // Copy data + if (data) { + void* mapped{device.mapMemory(*memory, 0, size, {}, dld)}; + std::copy_n( + reinterpret_cast(data), // NOLINT (unsafe cast) + size, + reinterpret_cast(mapped) // NOLINT (unsafe cast) + ); + device.unmapMemory(*memory, dld); + } + + return { + std::move(buffer), + std::move(memory) + }; +} + +/* Memory allocations */ + +vk::UniqueDeviceMemory vkhelper::allocateMemory( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + vk::DeviceSize size, + std::bitset<32> types, + bool hostVisible +) { + // Find a suitable memory type index + const auto memProps{physdev.getMemoryProperties2(dld)}; + + std::optional selectedTypeIdx{}; + for (uint32_t i = 0; i < memProps.memoryProperties.memoryTypeCount; i++) { + if (!types.test(i)) + continue; + const auto& memType{memProps.memoryProperties.memoryTypes.at(i)}; + + const bool isHostVisible{ + memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible && + memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent + }; + if (hostVisible && !isHostVisible) + continue; + + selectedTypeIdx = i; + + if (memType.propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal) + break; + + // Fallback to host-visible memory if no device-local memory is available + } + + if (!selectedTypeIdx) + throw std::runtime_error("No suitable memory type found for allocation"); + + // Allocate memory + const vk::MemoryAllocateInfo allocInfo{ + .allocationSize = size, + .memoryTypeIndex = *selectedTypeIdx + }; + return device.allocateMemoryUnique(allocInfo, nullptr, dld); +} + +/* Descriptors */ + +std::pair vkhelper::createDescriptorSet( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::DescriptorSetLayout& layout, + uint32_t samplers, uint32_t buffers, + uint32_t sampledImages, uint32_t storageImages +) { + const std::array poolSizes{{ + { .type = vk::DescriptorType::eSampler, + .descriptorCount = samplers }, + { .type = vk::DescriptorType::eSampledImage, + .descriptorCount = sampledImages }, + { .type = vk::DescriptorType::eStorageImage, + .descriptorCount = storageImages }, + { .type = vk::DescriptorType::eUniformBuffer, + .descriptorCount = buffers } + }}; + auto pool{device.createDescriptorPoolUnique({ + .flags = vk::DescriptorPoolCreateFlagBits::eUpdateAfterBind, + .maxSets = 1, + .poolSizeCount = static_cast(poolSizes.size()), + .pPoolSizes = poolSizes.data() + }, nullptr, dld)}; + + auto set{device.allocateDescriptorSets({ + .descriptorPool = *pool, + .descriptorSetCount = 1, + .pSetLayouts = &layout + }, dld).at(0)}; + + return{ + std::move(pool), + set + }; +} + +vk::UniqueImageView vkhelper::createImageView( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::Image& image, + vk::Format format, + uint32_t layers +) { + const vk::ImageViewCreateInfo viewInfo{ + .image = image, + .viewType = layers == 1 ? vk::ImageViewType::e2D : vk::ImageViewType::e2DArray, + .format = format, + .subresourceRange = { + .aspectMask = vk::ImageAspectFlagBits::eColor, + .levelCount = 1, + .layerCount = layers + } + }; + return device.createImageViewUnique(viewInfo, nullptr, dld); +} + +/* Command buffers */ + +vk::UniqueCommandPool vkhelper::createCommandPool( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + uint32_t qfi +) { + const vk::CommandPoolCreateInfo cmdpoolInfo{ + .queueFamilyIndex = qfi + }; + return device.createCommandPoolUnique(cmdpoolInfo, nullptr, dld); +} + +vk::UniqueCommandBuffer vkhelper::createCommandBuffer( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::CommandPool& cmdpool +) { + const vk::CommandBufferAllocateInfo cmdbufInfo{ + .commandPool = cmdpool, + .commandBufferCount = 1 + }; + return { std::move(device.allocateCommandBuffersUnique(cmdbufInfo, dld).front()) }; +} + +vk::UniqueSemaphore vkhelper::createTimelineSemaphore( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + bool exportable +) { + const vk::ExportSemaphoreCreateInfo exportInfo{ + .handleTypes = vk::ExternalSemaphoreHandleTypeFlagBits::eOpaqueFd + }; + const vk::SemaphoreTypeCreateInfo typeInfo{ + .pNext = exportable ? &exportInfo : nullptr, + .semaphoreType = vk::SemaphoreType::eTimeline, + }; + const vk::SemaphoreCreateInfo createInfo{ + .pNext = &typeInfo, + }; + return device.createSemaphoreUnique(createInfo, nullptr, dld); +} + +vk::UniqueFence vkhelper::createFence( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device +) { + return device.createFenceUnique({}, nullptr, dld); +} + +/* External memory */ + +std::pair vkhelper::createExternalImage( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + vk::Extent2D extent, + vk::Format format, + uint32_t layers, + vk::ImageUsageFlags usage +) { + const vk::ExternalMemoryImageCreateInfo externalInfo{ + .handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eOpaqueFd + }; + const vk::ImageCreateInfo imageInfo{ + .pNext = &externalInfo, + .imageType = vk::ImageType::e2D, + .format = format, + .extent = { + .width = extent.width, + .height = extent.height, + .depth = 1 + }, + .mipLevels = 1, + .arrayLayers = layers, + .samples = vk::SampleCountFlagBits::e1, + .usage = usage + }; + auto image{device.createImageUnique(imageInfo, nullptr, dld)}; + + // Find a suitable memory type index + const auto memProps{physdev.getMemoryProperties2(dld)}; + const auto requirements{device.getImageMemoryRequirements(*image, dld)}; + + std::optional selectedTypeIdx{}; + for (uint32_t i = 0; i < memProps.memoryProperties.memoryTypeCount; i++) { + if (!std::bitset<32>(requirements.memoryTypeBits).test(i)) + continue; + const auto& memType{memProps.memoryProperties.memoryTypes.at(i)}; + + if (memType.propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal) { + selectedTypeIdx = i; + break; + } + } + + if (!selectedTypeIdx) + throw std::runtime_error("No suitable memory type found for allocation"); + + // Allocate memory + const vk::MemoryDedicatedAllocateInfo dedicatedInfo{ + .image = *image, + }; + const vk::ExportMemoryAllocateInfo exportInfo{ + .pNext = &dedicatedInfo, + .handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eOpaqueFd + }; + const vk::MemoryAllocateInfo allocInfo{ + .pNext = &exportInfo, + .allocationSize = requirements.size, + .memoryTypeIndex = *selectedTypeIdx + }; + auto memory{device.allocateMemoryUnique(allocInfo, nullptr, dld)}; + + // Bind memory + device.bindImageMemory(*image, *memory, 0, dld); + + return{ + std::move(image), + std::move(memory) + }; +} + +int vkhelper::exportMemoryFd( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::DeviceMemory& memory +) { + const vk::MemoryGetFdInfoKHR fdInfo{ + .memory = memory, + .handleType = vk::ExternalMemoryHandleTypeFlagBits::eOpaqueFd + }; + return device.getMemoryFdKHR(fdInfo, dld); +} + +int vkhelper::exportSemaphoreFd( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::Semaphore& semaphore +) { + const vk::SemaphoreGetFdInfoKHR fdInfo{ + .semaphore = semaphore, + .handleType = vk::ExternalSemaphoreHandleTypeFlagBits::eOpaqueFd + }; + return device.getSemaphoreFdKHR(fdInfo, dld); +} diff --git a/lsfg-vk-backend/src/utility/vkhelper.hpp b/lsfg-vk-backend/src/utility/vkhelper.hpp new file mode 100644 index 0000000..8b871d7 --- /dev/null +++ b/lsfg-vk-backend/src/utility/vkhelper.hpp @@ -0,0 +1,449 @@ +/* SPDX-License-Identifier: GPL-3.0-or-later */ + +#pragma once + +#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 +#define VULKAN_HPP_TYPESAFE_CONVERSION 0 +#define VULKAN_HPP_NO_DEFAULT_DISPATCHER 1 +#define VULKAN_HPP_NO_CONSTRUCTORS 1 +#define VULKAN_HPP_NO_SETTERS 1 +#define VULKAN_HPP_NO_SPACESHIP_OPERATOR 1 +#define VULKAN_HPP_NO_TO_STRING 1 +#include // IWYU pragma: export + +// IWYU pragma: begin_exports +#include +#include +#include +#include +#include +#include +// IWYU pragma: end_exports + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace vkhelper { + + /* Device initialization */ + + /// + /// Create a Vulkan 1.2 instance for lsfg-vk + /// + /// @param dld Dynamic dispatch loader + /// @return RAII-wrapped Vulkan instance + /// @throws std::runtime_error on failure + /// + vk::UniqueInstance createInstance(vk::detail::DispatchLoaderDynamic& dld); + + /// + /// Find a physical device through a custom identifier + /// + /// The custom identifier may be one of: + /// - Device name (e.g. "NVIDIA GeForce RTX 5080") + /// - Vendor ID + Device ID in lowercase hexadecimal (e.g. "10de:2c02") + /// - PCI bus ID with padded zeroes (e.g. "0000:01:00.0") + /// + /// @param dld Dynamic dispatch loader + /// @param instance Vulkan instance + /// @param id Custom identifier + /// @return Selected physical device + /// @throws std::runtime_error if no suitable device found + /// + vk::PhysicalDevice findPhysicalDevice( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Instance& instance, + const std::string& id + ); + + /// + /// Find the first compute-capable queue family index + /// + /// @param dld Dynamic dispatch loader + /// @param physdev Physical device + /// @return Queue family index + /// @throws std::runtime_error if no compute-capable queue found + /// + uint32_t findComputeQueueFamilyIndex( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::PhysicalDevice& physdev + ); + + /// + /// Check a physical device for half-precision float support + /// + /// @param dld Dynamic dispatch loader + /// @param physdev Physical device + /// @return Whether half-precision float is supported + /// + bool checkHalfPrecisionSupport( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::PhysicalDevice& physdev + ); + + /// + /// Create a Vulkan device for lsfg-vk + /// + /// This device will have the core features timelineSemaphore and shaderFloat16 (if requested) + /// enabled, as well as the synchronization2, external memory & semaphore fd extensions. + /// + /// @param dld Dynamic dispatch loader + /// @param physdev Physical device + /// @param qfi Queue family index of compute-capable queue + /// @param fp16 Whether to enable half-precision float support + /// @return RAII-wrapped Vulkan device & compute queue + /// @throws std::runtime_error on failure + /// + std::pair createDevice( + vk::detail::DispatchLoaderDynamic& dld, + const vk::PhysicalDevice& physdev, + uint32_t qfi, + bool fp16 + ); + + /* Shader modules & pipelines */ + + /// + /// Create a Vulkan shader module from SPIR-V bytecode + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param code SPIR-V bytecode + /// @return RAII-wrapped Vulkan shader module + /// @throws std::runtime_error on failure + /// + vk::UniqueShaderModule createShaderModule( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const std::span& code + ); + + /// + /// Create and maintain the Vulkan pipeline cache for lsfg-vk + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param physdev Physical device + /// @param tag Cache tag for different pipelines + /// @return RAII-wrapped Vulkan pipeline cache + /// @throws std::runtime_error on failure + /// + std::pair createPipelineCache( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + std::string_view tag + ); + + /// + /// Persist the Vulkan pipeline cache to disk + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param physdev Physical device + /// @param cache Vulkan pipeline cache + /// @param tag Cache tag for different pipelines + /// @throws std::runtime_error on failure + /// + void persistPipelineCache( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + const vk::PipelineCache& cache, + std::string_view tag + ); + + /// + /// Create a Vulkan descriptor set layout + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param bindings List of descriptor set layout bindings + /// @param pushConstantSize Size of push constant range + /// @return RAII-wrapped Vulkan descriptor set & pipeline layout + /// @throws std::runtime_error on failure + /// + std::pair createLayout( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const std::vector& bindings, + size_t pushConstantSize + ); + + /* Resources */ + + /// + /// Create a (unallocated) Vulkan image for lsfg-vk + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param extent Image extent + /// @param format Image format + /// @param layers Amount of images + /// @param usage Image usage flags + /// @return RAII-wrapped Vulkan image + /// @throws std::runtime_error on failure + /// + vk::UniqueImage createImage( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + vk::Extent2D extent, + vk::Format format, + uint32_t layers, + vk::ImageUsageFlags usage + ); + + /// + /// Create a Vulkan sampler for lsfg-vk + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param mode Address mode + /// @param compare Comparison mode + /// @param white Black/White border color + /// @return RAII-wrapped Vulkan sampler + /// @throws std::runtime_error on failure + /// + vk::UniqueSampler createSampler( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + vk::SamplerAddressMode mode, + vk::CompareOp compare, + bool white + ); + + // (forward decl) + std::pair createBuffer( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + vk::BufferUsageFlags usage, + const void* data, + size_t size + ); + + /// + /// Create a Vulkan buffer for lsfg-vk + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param physdev Physical device + /// @param data Buffer contained data + /// @return RAII-wrapped Vulkan uniform buffer & device memory + /// @throws std::runtime_error on failure + /// + template + std::pair createBuffer( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + const T& data + ) { + return createBuffer( + dld, + device, + physdev, + vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eTransferDst, + static_cast(&data), + sizeof(T) + ); + } + + /* Memory allocations */ + + /// + /// Create a Vulkan memory allocation + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param physdev Physical device + /// @param size Allocation size + /// @param types Valid memory type bits + /// @param hostVisible Require host visible memory + /// @return RAII-wrapped Vulkan device memory + /// @throws std::runtime_error on failure + /// + vk::UniqueDeviceMemory allocateMemory( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + vk::DeviceSize size, + std::bitset<32> types, + bool hostVisible = false + ); + + /// + /// Align a memory allocation + /// + /// @param size Memory size + /// @param align Alignment + /// @return Aligned memory size + /// + inline vk::DeviceSize align(vk::DeviceSize size, vk::DeviceSize align) noexcept { + return (size + align - 1) & ~(align - 1); + } + + /* Descriptors */ + + /// + /// Create a Vulkan descriptor set for lsfg-vk + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param layout Descriptor set layout + /// @param samplers Amount of samplers + /// @param buffers Amount of buffers + /// @param sampledImages Amount of sampled images + /// @param storageImages Amount of storage images + /// @return Vulkan descriptor pool & set + /// @throws std::runtime_error on failure + /// + std::pair createDescriptorSet( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::DescriptorSetLayout& layout, + uint32_t samplers, uint32_t buffers, + uint32_t sampledImages, uint32_t storageImages + ); + + /// + /// Create an image view + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param image Vulkan image + /// @param format Image format + /// @param layers Amount of layers in image + /// @return RAII-wrapped Vulkan image view + /// @throws std::runtime_error on failure + /// + vk::UniqueImageView createImageView( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::Image& image, + vk::Format format, + uint32_t layers + ); + + /* Command buffers */ + + /// + /// Create a Vulkan command pool for lsfg-vk + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param qfi Queue family index + /// @return RAII-wrapped Vulkan command pool + /// @throws std::runtime_error on failure + /// + vk::UniqueCommandPool createCommandPool( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + uint32_t qfi + ); + + /// + /// Create a Vulkan command buffer for lsfg-vk + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param cmdpool Vulkan command pool + /// @return RAII-wrapped Vulkan command buffer + /// @throws std::runtime_error on failure + /// + vk::UniqueCommandBuffer createCommandBuffer( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::CommandPool& cmdpool + ); + + /// + /// Create a timeline semaphore + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param exportable Whether the semaphore should be exportable as a fd + /// @return RAII-wrapped Vulkan semaphore + /// @throws std::runtime_error on failure + /// + vk::UniqueSemaphore createTimelineSemaphore( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + bool exportable = false + ); + + /// + /// Create a fence + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @return RAII-wrapped Vulkan fence + /// @throws std::runtime_error on failure + /// + vk::UniqueFence createFence( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device + ); + + /* External memory */ + + /// + /// Create a Vulkan image with a fd-exportable dedicated allocation + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param physdev Physical device + /// @param extent Image extent + /// @param format Image format + /// @param layers Amount of images + /// @param usage Image usage flags + /// @return RAII-wrapped Vulkan image + /// @throws std::runtime_error on failure + /// + std::pair createExternalImage( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::PhysicalDevice& physdev, + vk::Extent2D extent, + vk::Format format, + uint32_t layers, + vk::ImageUsageFlags usage + ); + + /// + /// Export a Vulkan memory allocation as a fd + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param memory Vulkan device memory + /// @return File descriptor to the allocation + /// @throws std::runtime_error on failure + /// + int exportMemoryFd( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::DeviceMemory& memory + ); + + /// + /// Export a Vulkan semaphore as a fd + /// + /// @param dld Dynamic dispatch loader + /// @param device Vulkan device + /// @param semaphore Vulkan semaphore + /// @return File descriptor to the semaphore + /// @throws std::runtime_error on failure + /// + int exportSemaphoreFd( + const vk::detail::DispatchLoaderDynamic& dld, + const vk::Device& device, + const vk::Semaphore& semaphore + ); + +} diff --git a/lsfg-vk-cli/CMakeLists.txt b/lsfg-vk-cli/CMakeLists.txt index ca53131..b20cf06 100644 --- a/lsfg-vk-cli/CMakeLists.txt +++ b/lsfg-vk-cli/CMakeLists.txt @@ -6,12 +6,14 @@ set(CLI_SOURCES add_executable(lsfg-vk-cli ${CLI_SOURCES}) +target_include_directories(lsfg-vk-cli SYSTEM + PRIVATE thirdparty/include) + target_link_libraries(lsfg-vk-cli PUBLIC lsfg-vk-common PUBLIC lsfg-vk-backend) target_compile_options(lsfg-vk-cli PRIVATE - -Wno-unknown-warning-option -Wno-unsafe-buffer-usage) # CLI parsing install(TARGETS lsfg-vk-cli diff --git a/lsfg-vk-cli/src/main.cpp b/lsfg-vk-cli/src/main.cpp index 446ca8f..2726cff 100644 --- a/lsfg-vk-cli/src/main.cpp +++ b/lsfg-vk-cli/src/main.cpp @@ -18,7 +18,7 @@ using namespace lsfgvk::cli; namespace { - /// print usage information + /// Print usage information void usage(const std::string& prog) { std::cerr << R"(Validate, benchmark, and debug lsfg-vk. @@ -37,7 +37,7 @@ SUBCOMMAND OPTIONS: -c, --config Optional path to the configuration file benchmark & debug - -d, --dll Path to Lossless.dll + -d, --dll Path to lsfg-vk.dll -a, --allow-fp16 Allow FP16 acceleration -w, --width Width of the input frames -h, --height Height of the input frames @@ -53,7 +53,7 @@ SUBCOMMAND OPTIONS: Path to the debug frames)" << '\n'; } - /// parse the validate command options + /// Parse the validate command options [[noreturn]] void on_validate(int argc, char** argv) { validate::Options opts{}; @@ -83,7 +83,7 @@ SUBCOMMAND OPTIONS: std::exit(validate::run(opts)); } - /// parse the benchmark command options + /// Parse the benchmark command options [[noreturn]] void on_benchmark(int argc, char** argv) { benchmark::Options opts{}; @@ -145,7 +145,7 @@ SUBCOMMAND OPTIONS: std::exit(benchmark::run(opts)); } - /// parse the debug command options + /// Parse the debug command options [[noreturn]] void on_debug(int argc, char** argv) { debug::Options opts{}; diff --git a/lsfg-vk-cli/src/tools/benchmark.cpp b/lsfg-vk-cli/src/tools/benchmark.cpp index eb18aa3..ebd826d 100644 --- a/lsfg-vk-cli/src/tools/benchmark.cpp +++ b/lsfg-vk-cli/src/tools/benchmark.cpp @@ -1,12 +1,12 @@ /* SPDX-License-Identifier: GPL-3.0-or-later */ #include "benchmark.hpp" -#include "lsfg-vk-backend/lsfgvk.hpp" #include "lsfg-vk-common/helpers/errors.hpp" #include "lsfg-vk-common/helpers/paths.hpp" #include "lsfg-vk-common/vulkan/image.hpp" #include "lsfg-vk-common/vulkan/timeline_semaphore.hpp" #include "lsfg-vk-common/vulkan/vulkan.hpp" +#include "lsfg-vk/lsfgvk.hpp" #include #include @@ -18,7 +18,6 @@ #include #include #include -#include #include #include @@ -29,7 +28,7 @@ using namespace lsfgvk::cli; using namespace lsfgvk::cli::benchmark; namespace { - // get current time in milliseconds + // Get current time in milliseconds uint64_t ms() { struct timespec ts{}; clock_gettime(CLOCK_MONOTONIC, &ts); @@ -41,29 +40,28 @@ namespace { int benchmark::run(const Options& opts) { try { - // parse options + // Parse options if (opts.flow < 0.25F || opts.flow > 1.0F) - throw ls::error("flow scale must be between 0.25 and 1.0"); + throw ls::error("Flow scale must be between 0.25 and 1.0"); if (opts.multiplier < 2) - throw ls::error("multiplier must be 2 or greater"); + throw ls::error("Multiplier must be 2 or greater"); if (opts.width <= 0 || opts.height <= 0) - throw ls::error("width and height must be positive integers"); + throw ls::error("Width and height must be positive integers"); if (opts.duration <= 0) - throw ls::error("duration must be a positive integer"); + throw ls::error("Duration must be a positive integer"); const VkExtent2D extent{ static_cast(opts.width), static_cast(opts.height) }; - // create instance + // Create instance + std::string gpu_name{}; + const vk::Vulkan vk{ "lsfg-vk-debug", vk::version{2, 0, 0}, - "lsfg-vk-debug-engine", vk::version{2, 0, 0}, - [opts](const vk::VulkanInstanceFuncs fi, + "lsfg-vk-debug", vk::version{2, 0, 0}, + [opts, gpu_name = &gpu_name](const vk::VulkanInstanceFuncs fi, const std::vector& devices) { - if (!opts.gpu.has_value()) - return devices.front(); - for (const VkPhysicalDevice& device : devices) { VkPhysicalDeviceProperties2 props{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 @@ -72,84 +70,81 @@ int benchmark::run(const Options& opts) { auto& properties = props.properties; std::array devname = std::to_array(properties.deviceName); - devname.at(255) = '\0'; // ensure null-termination + devname.at(255) = '\0'; // Ensure null-termination - if (std::string(devname.data()) == *opts.gpu) + if (!opts.gpu || std::string(devname.data()) == *opts.gpu) { + *gpu_name = std::string(devname.data()); return device; + } } - throw ls::error("failed to find specified GPU: " + *opts.gpu); + throw ls::error("Failed to find specified GPU: " + *opts.gpu); } }; - std::pair srcfds{}; - const vk::Image frame_0{vk, - extent, VK_FORMAT_R8G8B8A8_UNORM, - VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - std::nullopt, &srcfds.first}; - const vk::Image frame_1{vk, - extent, VK_FORMAT_R8G8B8A8_UNORM, - VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - std::nullopt, &srcfds.second}; - - std::vector destimgs{}; - std::vector destfds{}; - for (int i = 0; i < (opts.multiplier - 1); i++) { - int fd{}; - destimgs.emplace_back(vk, - extent, VK_FORMAT_R8G8B8A8_UNORM, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - std::nullopt, - &fd - ); - destfds.push_back(fd); - } - - int syncfd{}; - const vk::TimelineSemaphore sync{vk, 0, std::nullopt, &syncfd}; - - // initialize backend + // Initialize backend std::string dll{}; if (opts.dll.has_value()) dll = *opts.dll; else dll = ls::findShaderDll(); - lsfgvk::backend::Instance lsfgvk{ - [opts]( - const std::string& gpu_name, - std::pair, - const std::optional& - ) { - return opts.gpu.value_or(gpu_name) == gpu_name; - }, - dll, opts.allow_fp16 + const lsfgvk::Instance lsfgvk{ + gpu_name, + dll, + opts.allow_fp16 + }; + lsfgvk::Context lsfgvk_ctx{ + lsfgvk, + extent.width, extent.height, + opts.flow, opts.performance_mode }; - lsfgvk::backend::Context& lsfgvk_ctx = lsfgvk.openContext( - srcfds, destfds, - syncfd, extent.width, extent.height, - false, 1.0F / opts.flow, opts.performance_mode - ); - // run the benchmark + // Import resources + const auto fds{lsfgvk_ctx.exportFds()}; + + const vk::Image source{vk, + extent, + VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + fds.sourceFd, std::nullopt, 2 + }; + const vk::Image destination{vk, + extent, + VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + fds.destinationFd + }; + const vk::TimelineSemaphore sync{vk, + 0, + fds.syncFd + }; + + // Run the benchmark + const uint32_t total{static_cast(opts.multiplier) - 1U}; + size_t iterations{0}; size_t generated_frames{0}; - size_t total_frames{1}; + size_t total_frames{0}; + size_t idx{1}; uint64_t print_time = ms() + 1000ULL; const uint64_t end_time = ms() + static_cast(opts.duration) * 1000ULL; while (ms() < end_time) { - sync.signal(vk, total_frames++); - lsfgvk.scheduleFrames(lsfgvk_ctx); + lsfgvk_ctx.dispatch(total); - for (size_t i = 0; i < destimgs.size(); i++) { - auto success = sync.wait(vk, total_frames++); + for (size_t i = 0; i < total; i++) { + sync.signal(vk, idx++); + + auto success = sync.wait(vk, idx++); if (!success) - throw ls::error("failed to wait for frame"); + throw ls::error("Failed to wait for frame"); + total_frames++; generated_frames++; } + total_frames++; iterations++; if (ms() >= print_time) { @@ -158,25 +153,25 @@ int benchmark::run(const Options& opts) { } } - // output results - + // Output results std::cerr << (opts.duration < 40 ? "\r" : "\n"); - std::cerr << "benchmark results (ran for " << opts.duration << " seconds):\n"; - std::cerr << " iterations: " << iterations << "\n"; - std::cerr << " generated frames: " << generated_frames << "\n"; - std::cerr << " total frames: " << total_frames << "\n"; + std::cerr << "Benchmark results (ran for " << opts.duration << " seconds):\n"; + std::cerr << " Iterations: " << iterations << "\n"; + std::cerr << " Generated frames: " << generated_frames << "\n"; + std::cerr << " Total frames: " << total_frames << "\n"; const auto time = static_cast(opts.duration); const double fps_generated = static_cast(generated_frames) / time; const double fps_total = static_cast(total_frames) / time; std::cerr << std::setprecision(2) << std::fixed; - std::cerr << " fps (generated): " << fps_generated << "fps\n"; - std::cerr << " fps (total): " << fps_total << "fps\n"; + std::cerr << " FPS (generated): " << fps_generated << "fps\n"; + std::cerr << " FPS (total): " << fps_total << "fps\n"; + + // Wait for idle + lsfgvk_ctx.idle(); - // deinitialize lsfg-vk - lsfgvk.closeContext(lsfgvk_ctx); return EXIT_SUCCESS; } catch (const std::exception& e) { - std::cerr << "error: " << e.what() << "\n"; + std::cerr << "Error: " << e.what() << "\n"; return EXIT_FAILURE; } } diff --git a/lsfg-vk-cli/src/tools/benchmark.hpp b/lsfg-vk-cli/src/tools/benchmark.hpp index 18dbe1c..720da44 100644 --- a/lsfg-vk-cli/src/tools/benchmark.hpp +++ b/lsfg-vk-cli/src/tools/benchmark.hpp @@ -7,7 +7,9 @@ namespace lsfgvk::cli::benchmark { - /// options for the "benchmark" command + /// + /// Options for the "benchmark" command + /// struct Options { std::optional dll; bool allow_fp16{false}; @@ -22,8 +24,12 @@ namespace lsfgvk::cli::benchmark { int duration{10}; }; - /// run the "benchmark" command - /// @param opts the command options + /// + /// Run the "benchmark" command + /// + /// @param opts Command options + /// @return Exit code + /// int run(const Options& opts); } diff --git a/lsfg-vk-cli/src/tools/debug.cpp b/lsfg-vk-cli/src/tools/debug.cpp index 3b21449..8c850f5 100644 --- a/lsfg-vk-cli/src/tools/debug.cpp +++ b/lsfg-vk-cli/src/tools/debug.cpp @@ -1,7 +1,6 @@ /* SPDX-License-Identifier: GPL-3.0-or-later */ #include "debug.hpp" -#include "lsfg-vk-backend/lsfgvk.hpp" #include "lsfg-vk-common/helpers/errors.hpp" #include "lsfg-vk-common/helpers/paths.hpp" #include "lsfg-vk-common/vulkan/buffer.hpp" @@ -10,6 +9,9 @@ #include "lsfg-vk-common/vulkan/timeline_semaphore.hpp" #include "lsfg-vk-common/vulkan/vulkan.hpp" +#define LSFGVK_PRIV +#include "lsfg-vk/lsfgvk.hpp" + #include #include #include @@ -22,25 +24,30 @@ #include #include #include -#include +#include #include +#include +#include #include using namespace lsfgvk::cli; using namespace lsfgvk::cli::debug; namespace { - /// uploads an image from a dds file - void upload_image(const vk::Vulkan& vk, - const vk::Image& image, const std::string& path) { - // read image bytecode + /// Upload an image from a DDS file + void uploadDDS(const vk::Vulkan& vk, + const vk::Image& image, + const std::string& path, + uint32_t layer + ) { + // Read image data std::ifstream file(path.data(), std::ios::binary | std::ios::ate); if (!file.is_open()) throw ls::error("ifstream::ifstream() failed"); std::streamsize size = static_cast(file.tellg()); - size -= 124 + 4; // dds header and magic bytes + size -= 124 + 4; // DDS header and magic bytes std::vector code(static_cast(size)); file.seekg(124 + 4, std::ios::beg); @@ -49,13 +56,13 @@ namespace { file.close(); - // upload to image + // Upload to image const vk::Buffer stagingbuf{vk, code.data(), code.size(), VK_BUFFER_USAGE_TRANSFER_SRC_BIT}; const vk::CommandBuffer cmdbuf{vk}; cmdbuf.begin(vk); - cmdbuf.copyBufferToImage(vk, stagingbuf, image); + cmdbuf.copyBufferToImage(vk, stagingbuf, image, layer); cmdbuf.end(vk); const vk::TimelineSemaphore sema{vk, 0}; @@ -65,19 +72,19 @@ namespace { int debug::run(const Options& opts) { try { - // parse options + // Parse options if (opts.flow < 0.25F || opts.flow > 1.0F) - throw ls::error("flow scale must be between 0.25 and 1.0"); + throw ls::error("Flow scale must be between 0.25 and 1.0"); if (opts.multiplier < 2) - throw ls::error("multiplier must be 2 or greater"); + throw ls::error("Multiplier must be 2 or greater"); if (opts.width <= 0 || opts.height <= 0) - throw ls::error("width and height must be positive integers"); + throw ls::error("Width and height must be positive integers"); const VkExtent2D extent{ static_cast(opts.width), static_cast(opts.height) }; if (!std::filesystem::exists(opts.path)) - throw ls::error("debug path does not exist: " + opts.path.string()); + throw ls::error("Debug path does not exist: " + opts.path.string()); std::vector paths{}; for (const auto& entry : std::filesystem::directory_iterator(opts.path)) paths.push_back(entry.path()); @@ -87,23 +94,22 @@ int debug::run(const Options& opts) { auto norm_a = fa.find_first_of('.'); if (norm_a == std::string::npos) - throw ls::error("invalid debug file name: " + fa); + throw ls::error("Invalid debug file name: " + fa); auto norm_b = fb.find_first_of('.'); if (norm_b == std::string::npos) - throw ls::error("invalid debug file name: " + fb); + throw ls::error("Invalid debug file name: " + fb); return std::stoi(fa.substr(0, norm_a)) < std::stoi(fb.substr(0, norm_b)); }); - // create instance + // Create instance + std::string gpu_name{}; + const vk::Vulkan vk{ "lsfg-vk-debug", vk::version{2, 0, 0}, - "lsfg-vk-debug-engine", vk::version{2, 0, 0}, - [opts](const vk::VulkanInstanceFuncs fi, + "lsfg-vk-debug", vk::version{2, 0, 0}, + [opts, gpu_name = &gpu_name](const vk::VulkanInstanceFuncs fi, const std::vector& devices) { - if (!opts.gpu.has_value()) - return devices.front(); - for (const VkPhysicalDevice& device : devices) { VkPhysicalDeviceProperties2 props{ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 @@ -112,87 +118,108 @@ int debug::run(const Options& opts) { auto& properties = props.properties; std::array devname = std::to_array(properties.deviceName); - devname.at(255) = '\0'; // ensure null-termination + devname.at(255) = '\0'; // Ensure null-termination - if (std::string(devname.data()) == *opts.gpu) + if (!opts.gpu || std::string(devname.data()) == *opts.gpu) { + *gpu_name = std::string(devname.data()); return device; + } } - throw ls::error("failed to find specified GPU: " + *opts.gpu); + throw ls::error("Failed to find specified GPU: " + *opts.gpu); } }; - std::pair srcfds{}; - const vk::Image frame_0{vk, - extent, VK_FORMAT_R8G8B8A8_UNORM, - VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - std::nullopt, &srcfds.first}; - const vk::Image frame_1{vk, - extent, VK_FORMAT_R8G8B8A8_UNORM, - VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - std::nullopt, &srcfds.second}; - - std::vector destimgs{}; - std::vector destfds{}; - for (int i = 0; i < (opts.multiplier - 1); i++) { - int fd{}; - destimgs.emplace_back(vk, - extent, VK_FORMAT_R8G8B8A8_UNORM, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - std::nullopt, - &fd - ); - destfds.push_back(fd); - } - - int syncfd{}; - const vk::TimelineSemaphore sync{vk, 0, std::nullopt, &syncfd}; - - // initialize backend + // Initialize backend std::string dll{}; if (opts.dll.has_value()) dll = *opts.dll; else dll = ls::findShaderDll(); - lsfgvk::backend::Instance lsfgvk{ - [opts]( - const std::string& gpu_name, - std::pair, - const std::optional& - ) { - return opts.gpu.value_or(gpu_name) == gpu_name; - }, - dll, opts.allow_fp16 - }; - lsfgvk::backend::Context& lsfgvk_ctx = lsfgvk.openContext( - srcfds, destfds, - syncfd, extent.width, extent.height, - false, 1.0F / opts.flow, opts.performance_mode - ); - // render destination images - size_t idx{1}; - for (size_t j = 0; j < paths.size(); j++) { - upload_image(vk, - j % 2 == 0 ? frame_0 : frame_1, - paths.at(j).string() + const lsfgvk::Instance lsfgvk{ + gpu_name, + dll, + opts.allow_fp16 + }; + lsfgvk::Context lsfgvk_ctx{ + lsfgvk, + extent.width, extent.height, + opts.flow, opts.performance_mode + }; + + // Import resources + const auto fds{lsfgvk_ctx.exportFds()}; + + const vk::Image source{vk, + extent, + VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + fds.sourceFd, std::nullopt, 2 + }; + const vk::Image destination{vk, + extent, + VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + fds.destinationFd + }; + const vk::TimelineSemaphore sync{vk, + 0, + fds.syncFd + }; + + // Try to open RenderDoc + RENDERDOC_API_1_6_0* rdoc_api{nullptr}; + RENDERDOC_DevicePointer rdoc_device{nullptr}; + if (void* module = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD)) { + void* func{dlsym(module, "RENDERDOC_GetAPI")}; + + auto* GetAPI{reinterpret_cast(func)}; // NOLINT (unsafe cast) + GetAPI( + eRENDERDOC_API_Version_1_0_0, + reinterpret_cast(&rdoc_api) // NOLINT (unsafe cast) ); - sync.signal(vk, idx++); - lsfgvk.scheduleFrames(lsfgvk_ctx); - - for (size_t i = 0; i < destimgs.size(); i++) { - auto success = sync.wait(vk, idx++); - if (!success) - throw ls::error("failed to wait for frame"); - } + rdoc_device = RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(lsfgvk._instance()); } - // deinitialize lsfg-vk - lsfgvk.closeContext(lsfgvk_ctx); + // Render destination images + const uint32_t total{static_cast(opts.multiplier) - 1U}; + + size_t idx{1}; + for (size_t j = 0; j < paths.size(); j++) { + uploadDDS(vk, source, paths.at(j).string(), j % 2); + + if (rdoc_api) { + rdoc_api->StartFrameCapture(rdoc_device, nullptr); + } + + std::thread signal_thread{[&sync, &vk, &idx, total] { + for (size_t i = 0; i < total; i++) { + sync.signal(vk, idx++); + + auto success = sync.wait(vk, idx++); + if (!success) + throw ls::error("Failed to wait for frame"); + } + }}; + + lsfgvk_ctx.dispatch(total); + + if (rdoc_api) { + lsfgvk_ctx.idle(); + rdoc_api->EndFrameCapture(rdoc_device, nullptr); + } + + signal_thread.join(); + } + + // Wait for idle + lsfgvk_ctx.idle(); + return EXIT_SUCCESS; } catch (const std::exception& e) { - std::cerr << "error: " << e.what() << "\n"; + std::cerr << "Error: " << e.what() << "\n"; return EXIT_FAILURE; } } diff --git a/lsfg-vk-cli/src/tools/debug.hpp b/lsfg-vk-cli/src/tools/debug.hpp index 631034a..9634e36 100644 --- a/lsfg-vk-cli/src/tools/debug.hpp +++ b/lsfg-vk-cli/src/tools/debug.hpp @@ -8,23 +8,29 @@ namespace lsfgvk::cli::debug { - /// options for the "debug" command + /// + /// Options for the "debug" command + /// struct Options { std::optional dll; - bool allow_fp16{true}; + bool allow_fp16{false}; int width{1920}; int height{1080}; - float flow{0.85F}; + float flow{1.0F}; int multiplier{2}; - bool performance_mode{true}; + bool performance_mode{false}; std::optional gpu; std::filesystem::path path; }; - /// run the "debug" command - /// @param opts the command options + /// + /// Run the "debug" command + /// + /// @param opts Command options + /// @return Exit code + /// int run(const Options& opts); } diff --git a/lsfg-vk-cli/src/tools/validate.cpp b/lsfg-vk-cli/src/tools/validate.cpp index 94542db..b78bcce 100644 --- a/lsfg-vk-cli/src/tools/validate.cpp +++ b/lsfg-vk-cli/src/tools/validate.cpp @@ -16,7 +16,7 @@ int validate::run(const Options& opts) { path = *opts.config; if (!std::filesystem::exists(path)) { - std::cerr << "Validation failed: configuration file does not exist\n"; + std::cerr << "Validation failed: Configuration file does not exist\n"; return 1; } diff --git a/lsfg-vk-cli/src/tools/validate.hpp b/lsfg-vk-cli/src/tools/validate.hpp index ee6d400..3936c7a 100644 --- a/lsfg-vk-cli/src/tools/validate.hpp +++ b/lsfg-vk-cli/src/tools/validate.hpp @@ -7,13 +7,19 @@ namespace lsfgvk::cli::validate { - /// options for the "validate" command + /// + /// Options for the "validate" command + /// struct Options { std::optional config; }; - /// run the "validate" command - /// @param opts the command options + /// + /// Run the "validate" command + /// + /// @param opts Command options + /// @return Exit code + /// int run(const Options& opts); } diff --git a/lsfg-vk-cli/thirdparty/include/renderdoc_app.h b/lsfg-vk-cli/thirdparty/include/renderdoc_app.h new file mode 100644 index 0000000..3cee3bd --- /dev/null +++ b/lsfg-vk-cli/thirdparty/include/renderdoc_app.h @@ -0,0 +1,875 @@ +/****************************************************************************** + * The MIT License (MIT) + * + * Copyright (c) 2015-2026 Baldur Karlsson + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + ******************************************************************************/ + +#pragma once + +////////////////////////////////////////////////////////////////////////////////////////////////// +// +// Documentation for the API is available at https://renderdoc.org/docs/in_application_api.html +// + +#if !defined(RENDERDOC_NO_STDINT) +#include +#endif + +#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER) +#define RENDERDOC_CC __cdecl +#elif defined(__linux__) || defined(__FreeBSD__) || defined(__sun__) || defined(__OpenBSD__) +#define RENDERDOC_CC +#elif defined(__APPLE__) +#define RENDERDOC_CC +#else +#error "Unknown platform" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +////////////////////////////////////////////////////////////////////////////////////////////////// +// Constants not used directly in below API + +// This is a GUID/magic value used for when applications pass a path where shader debug +// information can be found to match up with a stripped shader. +// the define can be used like so: const GUID RENDERDOC_ShaderDebugMagicValue = +// RENDERDOC_ShaderDebugMagicValue_value +#define RENDERDOC_ShaderDebugMagicValue_struct \ + { \ + 0xeab25520, 0x6670, 0x4865, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \ + } + +// as an alternative when you want a byte array (assuming x86 endianness): +#define RENDERDOC_ShaderDebugMagicValue_bytearray \ + { \ + 0x20, 0x55, 0xb2, 0xea, 0x70, 0x66, 0x65, 0x48, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \ + } + +// truncated version when only a uint64_t is available (e.g. Vulkan tags): +#define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL + +// this is a magic value for vulkan user tags to indicate which dispatchable API objects are which +// for object annotations +#define RENDERDOC_APIObjectAnnotationHelper 0xfbb3b337b664d0adULL + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc capture options +// + +typedef enum RENDERDOC_CaptureOption +{ + // Allow the application to enable vsync + // + // Default - enabled + // + // 1 - The application can enable or disable vsync at will + // 0 - vsync is force disabled + eRENDERDOC_Option_AllowVSync = 0, + + // Allow the application to enable fullscreen + // + // Default - enabled + // + // 1 - The application can enable or disable fullscreen at will + // 0 - fullscreen is force disabled + eRENDERDOC_Option_AllowFullscreen = 1, + + // Record API debugging events and messages + // + // Default - disabled + // + // 1 - Enable built-in API debugging features and records the results into + // the capture, which is matched up with events on replay + // 0 - no API debugging is forcibly enabled + eRENDERDOC_Option_APIValidation = 2, + eRENDERDOC_Option_DebugDeviceMode = 2, // deprecated name of this enum + + // Capture CPU callstacks for API events + // + // Default - disabled + // + // 1 - Enables capturing of callstacks + // 0 - no callstacks are captured + eRENDERDOC_Option_CaptureCallstacks = 3, + + // When capturing CPU callstacks, only capture them from actions. + // This option does nothing without the above option being enabled + // + // Default - disabled + // + // 1 - Only captures callstacks for actions. + // Ignored if CaptureCallstacks is disabled + // 0 - Callstacks, if enabled, are captured for every event. + eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4, + eRENDERDOC_Option_CaptureCallstacksOnlyActions = 4, + + // Specify a delay in seconds to wait for a debugger to attach, after + // creating or injecting into a process, before continuing to allow it to run. + // + // 0 indicates no delay, and the process will run immediately after injection + // + // Default - 0 seconds + // + eRENDERDOC_Option_DelayForDebugger = 5, + + // Verify buffer access. This includes checking the memory returned by a Map() call to + // detect any out-of-bounds modification, as well as initialising buffers with undefined contents + // to a marker value to catch use of uninitialised memory. + // + // NOTE: This option is only valid for OpenGL and D3D11. Explicit APIs such as D3D12 and Vulkan do + // not do the same kind of interception & checking and undefined contents are really undefined. + // + // Default - disabled + // + // 1 - Verify buffer access + // 0 - No verification is performed, and overwriting bounds may cause crashes or corruption in + // RenderDoc. + eRENDERDOC_Option_VerifyBufferAccess = 6, + + // The old name for eRENDERDOC_Option_VerifyBufferAccess was eRENDERDOC_Option_VerifyMapWrites. + // This option now controls the filling of uninitialised buffers with 0xdddddddd which was + // previously always enabled + eRENDERDOC_Option_VerifyMapWrites = eRENDERDOC_Option_VerifyBufferAccess, + + // Hooks any system API calls that create child processes, and injects + // RenderDoc into them recursively with the same options. + // + // Default - disabled + // + // 1 - Hooks into spawned child processes + // 0 - Child processes are not hooked by RenderDoc + eRENDERDOC_Option_HookIntoChildren = 7, + + // By default RenderDoc only includes resources in the final capture necessary + // for that frame, this allows you to override that behaviour. + // + // Default - disabled + // + // 1 - all live resources at the time of capture are included in the capture + // and available for inspection + // 0 - only the resources referenced by the captured frame are included + eRENDERDOC_Option_RefAllResources = 8, + + // **NOTE**: As of RenderDoc v1.1 this option has been deprecated. Setting or + // getting it will be ignored, to allow compatibility with older versions. + // In v1.1 the option acts as if it's always enabled. + // + // By default RenderDoc skips saving initial states for resources where the + // previous contents don't appear to be used, assuming that writes before + // reads indicate previous contents aren't used. + // + // Default - disabled + // + // 1 - initial contents at the start of each captured frame are saved, even if + // they are later overwritten or cleared before being used. + // 0 - unless a read is detected, initial contents will not be saved and will + // appear as black or empty data. + eRENDERDOC_Option_SaveAllInitials = 9, + + // In APIs that allow for the recording of command lists to be replayed later, + // RenderDoc may choose to not capture command lists before a frame capture is + // triggered, to reduce overheads. This means any command lists recorded once + // and replayed many times will not be available and may cause a failure to + // capture. + // + // NOTE: This is only true for APIs where multithreading is difficult or + // discouraged. Newer APIs like Vulkan and D3D12 will ignore this option + // and always capture all command lists since the API is heavily oriented + // around it and the overheads have been reduced by API design. + // + // 1 - All command lists are captured from the start of the application + // 0 - Command lists are only captured if their recording begins during + // the period when a frame capture is in progress. + eRENDERDOC_Option_CaptureAllCmdLists = 10, + + // Mute API debugging output when the API validation mode option is enabled + // + // Default - enabled + // + // 1 - Mute any API debug messages from being displayed or passed through + // 0 - API debugging is displayed as normal + eRENDERDOC_Option_DebugOutputMute = 11, + + // Option to allow vendor extensions to be used even when they may be + // incompatible with RenderDoc and cause corrupted replays or crashes. + // + // Default - inactive + // + // No values are documented, this option should only be used when absolutely + // necessary as directed by a RenderDoc developer. + eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12, + + // Define a soft memory limit which some APIs may aim to keep overhead under where + // possible. Anything above this limit will where possible be saved directly to disk during + // capture. + // This will cause increased disk space use (which may cause a capture to fail if disk space is + // exhausted) as well as slower capture times. + // + // Not all memory allocations may be deferred like this so it is not a guarantee of a memory + // limit. + // + // Units are in MBs, suggested values would range from 200MB to 1000MB. + // + // Default - 0 Megabytes + eRENDERDOC_Option_SoftMemoryLimit = 13, +} RENDERDOC_CaptureOption; + +// Sets an option that controls how RenderDoc behaves on capture. +// +// Returns 1 if the option and value are valid +// Returns 0 if either is invalid and the option is unchanged +typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val); +typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val); + +// Gets the current value of an option as a uint32_t +// +// If the option is invalid, 0xffffffff is returned +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt); + +// Gets the current value of an option as a float +// +// If the option is invalid, -FLT_MAX is returned +typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt); + +typedef enum RENDERDOC_InputButton +{ + // '0' - '9' matches ASCII values + eRENDERDOC_Key_0 = 0x30, + eRENDERDOC_Key_1 = 0x31, + eRENDERDOC_Key_2 = 0x32, + eRENDERDOC_Key_3 = 0x33, + eRENDERDOC_Key_4 = 0x34, + eRENDERDOC_Key_5 = 0x35, + eRENDERDOC_Key_6 = 0x36, + eRENDERDOC_Key_7 = 0x37, + eRENDERDOC_Key_8 = 0x38, + eRENDERDOC_Key_9 = 0x39, + + // 'A' - 'Z' matches ASCII values + eRENDERDOC_Key_A = 0x41, + eRENDERDOC_Key_B = 0x42, + eRENDERDOC_Key_C = 0x43, + eRENDERDOC_Key_D = 0x44, + eRENDERDOC_Key_E = 0x45, + eRENDERDOC_Key_F = 0x46, + eRENDERDOC_Key_G = 0x47, + eRENDERDOC_Key_H = 0x48, + eRENDERDOC_Key_I = 0x49, + eRENDERDOC_Key_J = 0x4A, + eRENDERDOC_Key_K = 0x4B, + eRENDERDOC_Key_L = 0x4C, + eRENDERDOC_Key_M = 0x4D, + eRENDERDOC_Key_N = 0x4E, + eRENDERDOC_Key_O = 0x4F, + eRENDERDOC_Key_P = 0x50, + eRENDERDOC_Key_Q = 0x51, + eRENDERDOC_Key_R = 0x52, + eRENDERDOC_Key_S = 0x53, + eRENDERDOC_Key_T = 0x54, + eRENDERDOC_Key_U = 0x55, + eRENDERDOC_Key_V = 0x56, + eRENDERDOC_Key_W = 0x57, + eRENDERDOC_Key_X = 0x58, + eRENDERDOC_Key_Y = 0x59, + eRENDERDOC_Key_Z = 0x5A, + + // leave the rest of the ASCII range free + // in case we want to use it later + eRENDERDOC_Key_NonPrintable = 0x100, + + eRENDERDOC_Key_Divide, + eRENDERDOC_Key_Multiply, + eRENDERDOC_Key_Subtract, + eRENDERDOC_Key_Plus, + + eRENDERDOC_Key_F1, + eRENDERDOC_Key_F2, + eRENDERDOC_Key_F3, + eRENDERDOC_Key_F4, + eRENDERDOC_Key_F5, + eRENDERDOC_Key_F6, + eRENDERDOC_Key_F7, + eRENDERDOC_Key_F8, + eRENDERDOC_Key_F9, + eRENDERDOC_Key_F10, + eRENDERDOC_Key_F11, + eRENDERDOC_Key_F12, + + eRENDERDOC_Key_Home, + eRENDERDOC_Key_End, + eRENDERDOC_Key_Insert, + eRENDERDOC_Key_Delete, + eRENDERDOC_Key_PageUp, + eRENDERDOC_Key_PageDn, + + eRENDERDOC_Key_Backspace, + eRENDERDOC_Key_Tab, + eRENDERDOC_Key_PrtScrn, + eRENDERDOC_Key_Pause, + + eRENDERDOC_Key_Max, +} RENDERDOC_InputButton; + +// Sets which key or keys can be used to toggle focus between multiple windows +// +// If keys is NULL or num is 0, toggle keys will be disabled +typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num); + +// Sets which key or keys can be used to capture the next frame +// +// If keys is NULL or num is 0, captures keys will be disabled +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num); + +typedef enum RENDERDOC_OverlayBits +{ + // This single bit controls whether the overlay is enabled or disabled globally + eRENDERDOC_Overlay_Enabled = 0x1, + + // Show the average framerate over several seconds as well as min/max + eRENDERDOC_Overlay_FrameRate = 0x2, + + // Show the current frame number + eRENDERDOC_Overlay_FrameNumber = 0x4, + + // Show a list of recent captures, and how many captures have been made + eRENDERDOC_Overlay_CaptureList = 0x8, + + // Default values for the overlay mask + eRENDERDOC_Overlay_Default = (eRENDERDOC_Overlay_Enabled | eRENDERDOC_Overlay_FrameRate | + eRENDERDOC_Overlay_FrameNumber | eRENDERDOC_Overlay_CaptureList), + + // Enable all bits + eRENDERDOC_Overlay_All = 0x7ffffff, + + // Disable all bits + eRENDERDOC_Overlay_None = 0, +} RENDERDOC_OverlayBits; + +// returns the overlay bits that have been set +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetOverlayBits)(void); +// sets the overlay bits with an and & or mask +typedef void(RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or); + +// this function will attempt to remove RenderDoc's hooks in the application. +// +// Note: that this can only work correctly if done immediately after +// the module is loaded, before any API work happens. RenderDoc will remove its +// injected hooks and shut down. Behaviour is undefined if this is called +// after any API functions have been called, and there is still no guarantee of +// success. +typedef void(RENDERDOC_CC *pRENDERDOC_RemoveHooks)(void); + +// DEPRECATED: compatibility for code compiled against pre-1.4.1 headers. +typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown; + +// This function will unload RenderDoc's crash handler. +// +// If you use your own crash handler and don't want RenderDoc's handler to +// intercede, you can call this function to unload it and any unhandled +// exceptions will pass to the next handler. +typedef void(RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)(void); + +// Sets the capture file path template +// +// pathtemplate is a UTF-8 string that gives a template for how captures will be named +// and where they will be saved. +// +// Any extension is stripped off the path, and captures are saved in the directory +// specified, and named with the filename and the frame number appended. If the +// directory does not exist it will be created, including any parent directories. +// +// If pathtemplate is NULL, the template will remain unchanged +// +// Example: +// +// SetCaptureFilePathTemplate("my_captures/example"); +// +// Capture #1 -> my_captures/example_frame123.rdc +// Capture #2 -> my_captures/example_frame456.rdc +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFilePathTemplate)(const char *pathtemplate); + +// returns the current capture path template, see SetCaptureFileTemplate above, as a UTF-8 string +typedef const char *(RENDERDOC_CC *pRENDERDOC_GetCaptureFilePathTemplate)(void); + +// DEPRECATED: compatibility for code compiled against pre-1.1.2 headers. +typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathTemplate; +typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathTemplate; + +// returns the number of captures that have been made +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetNumCaptures)(void); + +// This function returns the details of a capture, by index. New captures are added +// to the end of the list. +// +// filename will be filled with the absolute path to the capture file, as a UTF-8 string +// pathlength will be written with the length in bytes of the filename string +// timestamp will be written with the time of the capture, in seconds since the Unix epoch +// +// Any of the parameters can be NULL and they'll be skipped. +// +// The function will return 1 if the capture index is valid, or 0 if the index is invalid +// If the index is invalid, the values will be unchanged +// +// Note: when captures are deleted in the UI they will remain in this list, so the +// capture path may not exist anymore. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *filename, + uint32_t *pathlength, uint64_t *timestamp); + +// Sets the comments associated with a capture file. These comments are displayed in the +// UI program when opening. +// +// filePath should be a path to the capture file to add comments to. If set to NULL or "" +// the most recent capture file created made will be used instead. +// comments should be a NULL-terminated UTF-8 string to add as comments. +// +// Any existing comments will be overwritten. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFileComments)(const char *filePath, + const char *comments); + +// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsTargetControlConnected)(void); + +// DEPRECATED: compatibility for code compiled against pre-1.1.1 headers. +// This was renamed to IsTargetControlConnected in API 1.1.1, the old typedef is kept here for +// backwards compatibility with old code, it is castable either way since it's ABI compatible +// as the same function pointer type. +typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessConnected; + +// This function will launch the Replay UI associated with the RenderDoc library injected +// into the running application. +// +// if connectTargetControl is 1, the Replay UI will be launched with a command line parameter +// to connect to this application +// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open +// if cmdline is NULL, the command line will be empty. +// +// returns the PID of the replay UI if successful, 0 if not successful. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTargetControl, + const char *cmdline); + +// RenderDoc can return a higher version than requested if it's backwards compatible, +// this function returns the actual version returned. If a parameter is NULL, it will be +// ignored and the others will be filled out. +typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch); + +// Requests that the replay UI show itself (if hidden or not the current top window). This can be +// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle +// showing the UI after making a capture. +// +// This will return 1 if the request was successfully passed on, though it's not guaranteed that +// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current +// target control connection to make such a request, or if there was another error +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(void); + +////////////////////////////////////////////////////////////////////////// +// Capturing functions +// + +// A device pointer is a pointer to the API's root handle. +// +// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc +typedef void *RENDERDOC_DevicePointer; + +// A window handle is the OS's native window handle +// +// This would be an HWND, GLXDrawable, etc +typedef void *RENDERDOC_WindowHandle; + +// A helper macro for Vulkan, where the device handle cannot be used directly. +// +// Passing the VkInstance to this macro will return the RENDERDOC_DevicePointer to use. +// +// Specifically, the value needed is the dispatch table pointer, which sits as the first +// pointer-sized object in the memory pointed to by the VkInstance. Thus we cast to a void** and +// indirect once. +#define RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(inst) (*((void **)(inst))) + +// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will +// respond to keypresses. Neither parameter can be NULL +typedef void(RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// capture the next frame on whichever window and API is currently considered active +typedef void(RENDERDOC_CC *pRENDERDOC_TriggerCapture)(void); + +// capture the next N frames on whichever window and API is currently considered active +typedef void(RENDERDOC_CC *pRENDERDOC_TriggerMultiFrameCapture)(uint32_t numFrames); + +// When choosing either a device pointer or a window handle to capture, you can pass NULL. +// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify +// any API rendering to a specific window, or a specific API instance rendering to any window, +// or in the simplest case of one window and one API, you can just pass NULL for both. +// +// In either case, if there are two or more possible matching (device,window) pairs it +// is undefined which one will be captured. +// +// Note: for headless rendering you can pass NULL for the window handle and either specify +// a device pointer or leave it NULL as above. + +// Immediately starts capturing API calls on the specified device pointer and window handle. +// +// If there is no matching thing to capture (e.g. no supported API has been initialised), +// this will do nothing. +// +// The results are undefined (including crashes) if two captures are started overlapping, +// even on separate devices and/oror windows. +typedef void(RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// Returns whether or not a frame capture is currently ongoing anywhere. +// +// This will return 1 if a capture is ongoing, and 0 if there is no capture running +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)(void); + +// Ends capturing immediately. +// +// This will return 1 if the capture succeeded, and 0 if there was an error capturing. +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// Ends capturing immediately and discard any data stored without saving to disk. +// +// This will return 1 if the capture was discarded, and 0 if there was an error or no capture +// was in progress +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device, + RENDERDOC_WindowHandle wndHandle); + +// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom +// title to the capture produced which will be displayed in the UI. +// +// If multiple captures are ongoing, this title will be applied to the first capture to end after +// this call. The second capture to end will have no title, unless this function is called again. +// +// Calling this function has no effect if no capture is currently running, and if it is called +// multiple times only the last title will be used. +typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title); + +// Annotations API: +// +// These functions allow you to specify annotations either on a per-command level, or a per-object +// level. +// +// Basic types of annotations are supported, as well as vector versions and references to API objects. +// +// The annotations are stored as keys, with the key being a dot-separated path allowing arbitrary +// nesting and user organisation. The keys are sorted in human order so `foo.2.bar` will be displayed +// before `foo.10.bar` to allow creation of arrays if desired. +// +// Deleting an annotation can be done by assigning an empty value to it. + +// the type of an annotation value, or Empty to delete an annotation +typedef enum RENDERDOC_AnnotationType +{ + eRENDERDOC_Empty, + eRENDERDOC_Bool, + eRENDERDOC_Int32, + eRENDERDOC_UInt32, + eRENDERDOC_Int64, + eRENDERDOC_UInt64, + eRENDERDOC_Float, + eRENDERDOC_Double, + eRENDERDOC_String, + eRENDERDOC_APIObject, + eRENDERDOC_AnnotationMax = 0x7FFFFFFF, +} RENDERDOC_AnnotationType; + +// a union with vector annotation value data +typedef union RENDERDOC_AnnotationVectorValue +{ + bool boolean[4]; + int32_t int32[4]; + int64_t int64[4]; + uint32_t uint32[4]; + uint64_t uint64[4]; + float float32[4]; + double float64[4]; +} RENDERDOC_AnnotationVectorValue; + +// a union with scalar annotation value data +typedef union RENDERDOC_AnnotationValue +{ + bool boolean; + int32_t int32; + int64_t int64; + uint32_t uint32; + uint64_t uint64; + float float32; + double float64; + + RENDERDOC_AnnotationVectorValue vector; + + const char *string; + void *apiObject; +} RENDERDOC_AnnotationValue; + +// a struct for specifying a GL object, as we don't have pointers we can use so instead we specify a +// pointer to this struct giving both the type and the name +typedef struct RENDERDOC_GLResourceReference +{ + // this is the same GLenum identifier as passed to glObjectLabel + uint32_t identifier; + uint32_t name; +} GLResourceReference; + +// simple C++ helpers to avoid the need for a temporary objects for value passing and GL object specification +#ifdef __cplusplus +struct RDGLObjectHelper +{ + RENDERDOC_GLResourceReference gl; + + RDGLObjectHelper(uint32_t identifier, uint32_t name) + { + gl.identifier = identifier; + gl.name = name; + } + + operator RENDERDOC_GLResourceReference *() { return ≷ } +}; + +struct RDAnnotationHelper +{ + RENDERDOC_AnnotationValue val; + + RDAnnotationHelper(bool b) { val.boolean = b; } + RDAnnotationHelper(int32_t i) { val.int32 = i; } + RDAnnotationHelper(int64_t i) { val.int64 = i; } + RDAnnotationHelper(uint32_t i) { val.uint32 = i; } + RDAnnotationHelper(uint64_t i) { val.uint64 = i; } + RDAnnotationHelper(float f) { val.float32 = f; } + RDAnnotationHelper(double d) { val.float64 = d; } + RDAnnotationHelper(const char *s) { val.string = s; } + + operator RENDERDOC_AnnotationValue *() { return &val; } +}; +#endif + +// The device is specified in the same way as other API calls that take a RENDERDOC_DevicePointer +// to specify the device. +// +// The object or queue/commandbuffer will depend on the graphics API in question. +// +// Return value: +// 0 - The annotation was applied successfully. +// 1 - The device is unknown/invalid +// 2 - The device is valid but the annotation is not supported for API-specific reasons, such as an +// unrecognised or invalid object or queue/commandbuffer +// 3 - The call is ill-formed or invalid e.g. empty is specified with a value pointer, or non-empty +// is specified with a NULL value pointer +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_SetObjectAnnotation)(RENDERDOC_DevicePointer device, + void *object, const char *key, + RENDERDOC_AnnotationType valueType, + uint32_t valueVectorWidth, + const RENDERDOC_AnnotationValue *value); + +typedef uint32_t(RENDERDOC_CC *pRENDERDOC_SetCommandAnnotation)( + RENDERDOC_DevicePointer device, void *queueOrCommandBuffer, const char *key, + RENDERDOC_AnnotationType valueType, uint32_t valueVectorWidth, + const RENDERDOC_AnnotationValue *value); + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API versions +// + +// RenderDoc uses semantic versioning (http://semver.org/). +// +// MAJOR version is incremented when incompatible API changes happen. +// MINOR version is incremented when functionality is added in a backwards-compatible manner. +// PATCH version is incremented when backwards-compatible bug fixes happen. +// +// Note that this means the API returned can be higher than the one you might have requested. +// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned +// instead of 1.0.0. You can check this with the GetAPIVersion entry point +typedef enum RENDERDOC_Version +{ + eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00 + eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01 + eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02 + eRENDERDOC_API_Version_1_1_0 = 10100, // RENDERDOC_API_1_1_0 = 1 01 00 + eRENDERDOC_API_Version_1_1_1 = 10101, // RENDERDOC_API_1_1_1 = 1 01 01 + eRENDERDOC_API_Version_1_1_2 = 10102, // RENDERDOC_API_1_1_2 = 1 01 02 + eRENDERDOC_API_Version_1_2_0 = 10200, // RENDERDOC_API_1_2_0 = 1 02 00 + eRENDERDOC_API_Version_1_3_0 = 10300, // RENDERDOC_API_1_3_0 = 1 03 00 + eRENDERDOC_API_Version_1_4_0 = 10400, // RENDERDOC_API_1_4_0 = 1 04 00 + eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01 + eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02 + eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00 + eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00 + eRENDERDOC_API_Version_1_7_0 = 10700, // RENDERDOC_API_1_7_0 = 1 07 00 +} RENDERDOC_Version; + +// API version changelog: +// +// 1.0.0 - initial release +// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered +// by keypress or TriggerCapture, instead of Start/EndFrameCapture. +// 1.0.2 - Refactor: Renamed eRENDERDOC_Option_DebugDeviceMode to eRENDERDOC_Option_APIValidation +// 1.1.0 - Add feature: TriggerMultiFrameCapture(). Backwards compatible with 1.0.x since the new +// function pointer is added to the end of the struct, the original layout is identical +// 1.1.1 - Refactor: Renamed remote access to target control (to better disambiguate from remote +// replay/remote server concept in replay UI) +// 1.1.2 - Refactor: Renamed "log file" in function names to just capture, to clarify that these +// are captures and not debug logging files. This is the first API version in the v1.0 +// branch. +// 1.2.0 - Added feature: SetCaptureFileComments() to add comments to a capture file that will be +// displayed in the UI program on load. +// 1.3.0 - Added feature: New capture option eRENDERDOC_Option_AllowUnsupportedVendorExtensions +// which allows users to opt-in to allowing unsupported vendor extensions to function. +// Should be used at the user's own risk. +// Refactor: Renamed eRENDERDOC_Option_VerifyMapWrites to +// eRENDERDOC_Option_VerifyBufferAccess, which now also controls initialisation to +// 0xdddddddd of uninitialised buffer contents. +// 1.4.0 - Added feature: DiscardFrameCapture() to discard a frame capture in progress and stop +// capturing without saving anything to disk. +// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening +// 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option. +// 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected +// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a +// capture made with StartFrameCapture() or EndFrameCapture() +// 1.7.0 - Added feature: SetObjectAnnotation() / SetCommandAnnotation() for adding rich +// annotations to objects and command streams + +typedef struct RENDERDOC_API_1_7_0 +{ + pRENDERDOC_GetAPIVersion GetAPIVersion; + + pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32; + pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32; + + pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32; + pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32; + + pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys; + pRENDERDOC_SetCaptureKeys SetCaptureKeys; + + pRENDERDOC_GetOverlayBits GetOverlayBits; + pRENDERDOC_MaskOverlayBits MaskOverlayBits; + + // Shutdown was renamed to RemoveHooks in 1.4.1. + // These unions allow old code to continue compiling without changes + union + { + pRENDERDOC_Shutdown Shutdown; + pRENDERDOC_RemoveHooks RemoveHooks; + }; + pRENDERDOC_UnloadCrashHandler UnloadCrashHandler; + + // Get/SetLogFilePathTemplate was renamed to Get/SetCaptureFilePathTemplate in 1.1.2. + // These unions allow old code to continue compiling without changes + union + { + // deprecated name + pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate; + // current name + pRENDERDOC_SetCaptureFilePathTemplate SetCaptureFilePathTemplate; + }; + union + { + // deprecated name + pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate; + // current name + pRENDERDOC_GetCaptureFilePathTemplate GetCaptureFilePathTemplate; + }; + + pRENDERDOC_GetNumCaptures GetNumCaptures; + pRENDERDOC_GetCapture GetCapture; + + pRENDERDOC_TriggerCapture TriggerCapture; + + // IsRemoteAccessConnected was renamed to IsTargetControlConnected in 1.1.1. + // This union allows old code to continue compiling without changes + union + { + // deprecated name + pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected; + // current name + pRENDERDOC_IsTargetControlConnected IsTargetControlConnected; + }; + pRENDERDOC_LaunchReplayUI LaunchReplayUI; + + pRENDERDOC_SetActiveWindow SetActiveWindow; + + pRENDERDOC_StartFrameCapture StartFrameCapture; + pRENDERDOC_IsFrameCapturing IsFrameCapturing; + pRENDERDOC_EndFrameCapture EndFrameCapture; + + // new function in 1.1.0 + pRENDERDOC_TriggerMultiFrameCapture TriggerMultiFrameCapture; + + // new function in 1.2.0 + pRENDERDOC_SetCaptureFileComments SetCaptureFileComments; + + // new function in 1.4.0 + pRENDERDOC_DiscardFrameCapture DiscardFrameCapture; + + // new function in 1.5.0 + pRENDERDOC_ShowReplayUI ShowReplayUI; + + // new function in 1.6.0 + pRENDERDOC_SetCaptureTitle SetCaptureTitle; + + // new functions in 1.7.0 + pRENDERDOC_SetObjectAnnotation SetObjectAnnotation; + pRENDERDOC_SetCommandAnnotation SetCommandAnnotation; +} RENDERDOC_API_1_7_0; + +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_0_0; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_0_1; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_0_2; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_1_0; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_1_1; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_1_2; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_2_0; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_3_0; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_4_0; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_4_1; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_4_2; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_5_0; +typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_6_0; + +////////////////////////////////////////////////////////////////////////////////////////////////// +// RenderDoc API entry point +// +// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available. +// +// The name is the same as the typedef - "RENDERDOC_GetAPI" +// +// This function is not thread safe, and should not be called on multiple threads at once. +// Ideally, call this once as early as possible in your application's startup, before doing +// any API work, since some configuration functionality etc has to be done also before +// initialising any APIs. +// +// Parameters: +// version is a single value from the RENDERDOC_Version above. +// +// outAPIPointers will be filled out with a pointer to the corresponding struct of function +// pointers. +// +// Returns: +// 1 - if the outAPIPointers has been filled with a pointer to the API struct requested +// 0 - if the requested version is not supported or the arguments are invalid. +// +typedef int(RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers); + +#ifdef __cplusplus +} // extern "C" +#endif diff --git a/lsfg-vk-common/CMakeLists.txt b/lsfg-vk-common/CMakeLists.txt index b03c78d..5833796 100644 --- a/lsfg-vk-common/CMakeLists.txt +++ b/lsfg-vk-common/CMakeLists.txt @@ -23,7 +23,12 @@ target_include_directories(lsfg-vk-common target_include_directories(lsfg-vk-common SYSTEM PRIVATE thirdparty/include) -if(LSFGVK_INSTALL_DEVELOP) +target_compile_options(lsfg-vk-common PUBLIC + -Wno-cast-function-type-strict # Vulkan function pointers + -Wno-shadow # Shadowing variables used to be common practice +) + +if(LSFGVK_INSTALL_LIBRARIES) install(TARGETS lsfg-vk-common ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}") install(DIRECTORY "include/lsfg-vk-common/" diff --git a/lsfg-vk-common/include/lsfg-vk-common/helpers/paths.hpp b/lsfg-vk-common/include/lsfg-vk-common/helpers/paths.hpp index d51225e..4731d33 100644 --- a/lsfg-vk-common/include/lsfg-vk-common/helpers/paths.hpp +++ b/lsfg-vk-common/include/lsfg-vk-common/helpers/paths.hpp @@ -6,7 +6,7 @@ namespace ls { - /// find the location of the Lossless.dll + /// find the location of the lsfg-vk.dll /// @returns the path to the DLL /// @throws ls::error if the DLL could not be found std::filesystem::path findShaderDll(); diff --git a/lsfg-vk-common/include/lsfg-vk-common/vulkan/buffer.hpp b/lsfg-vk-common/include/lsfg-vk-common/vulkan/buffer.hpp index 7fabeb2..ed762db 100644 --- a/lsfg-vk-common/include/lsfg-vk-common/vulkan/buffer.hpp +++ b/lsfg-vk-common/include/lsfg-vk-common/vulkan/buffer.hpp @@ -21,7 +21,12 @@ namespace vk { template Buffer(const vk::Vulkan& vk, const T& data, VkBufferUsageFlags usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT) - : Buffer(vk, reinterpret_cast(&data), sizeof(T), usage) {} + : Buffer( + vk, + reinterpret_cast(&data), // NOLINT (unsafe cast) + sizeof(T), + usage + ) {} /// create a buffer /// @param vk the vulkan instance diff --git a/lsfg-vk-common/include/lsfg-vk-common/vulkan/command_buffer.hpp b/lsfg-vk-common/include/lsfg-vk-common/vulkan/command_buffer.hpp index 0129238..cbec421 100644 --- a/lsfg-vk-common/include/lsfg-vk-common/vulkan/command_buffer.hpp +++ b/lsfg-vk-common/include/lsfg-vk-common/vulkan/command_buffer.hpp @@ -42,7 +42,8 @@ namespace vk { void blitImage(const vk::Vulkan& vk, const std::vector& preBarriers, std::pair images, VkExtent2D extent, - const std::vector& postBarriers) const; + const std::vector& postBarriers, + uint32_t srcLayer = 0, uint32_t dstLayer = 0) const; /// insert a bunch of barriers /// @param vk the vulkan instance @@ -68,7 +69,8 @@ namespace vk { /// @param buffer the source buffer /// @param image the destination image void copyBufferToImage(const vk::Vulkan& vk, - const vk::Buffer& buffer, const vk::Image& image) const; + const vk::Buffer& buffer, const vk::Image& image, + uint32_t dstLayer = 0) const; /// end recording commands /// @param vk the vulkan instance diff --git a/lsfg-vk-common/include/lsfg-vk-common/vulkan/image.hpp b/lsfg-vk-common/include/lsfg-vk-common/vulkan/image.hpp index db38f01..9613915 100644 --- a/lsfg-vk-common/include/lsfg-vk-common/vulkan/image.hpp +++ b/lsfg-vk-common/include/lsfg-vk-common/vulkan/image.hpp @@ -5,6 +5,7 @@ #include "../helpers/pointers.hpp" #include "vulkan.hpp" +#include #include #include @@ -26,7 +27,9 @@ namespace vk { VkFormat format = VK_FORMAT_R8G8B8A8_UNORM, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, std::optional importFd = std::nullopt, - std::optional exportFd = std::nullopt); + std::optional exportFd = std::nullopt, + uint32_t arrayLayers = 1 + ); /// get the image handle /// @return the image handle diff --git a/lsfg-vk-common/src/configuration/config.cpp b/lsfg-vk-common/src/configuration/config.cpp index cee5861..1163e1b 100644 --- a/lsfg-vk-common/src/configuration/config.cpp +++ b/lsfg-vk-common/src/configuration/config.cpp @@ -30,7 +30,7 @@ void ConfigFile::createDefaultConfigFile(const std::filesystem::path& path) { ofs << R"(version = 2 [global] -# dll = '/media/games/Lossless Scaling/Lossless.dll' # if you don't have LS in the default location +# dll = '/media/games/Lossless Scaling/lsfg-vk.dll' # if you don't have LS in the default location allow_fp16 = true # this will improve give a MASSIVE performance boost on AMD, but be super slow on older (!) NVIDIA GPUs [[profile]] diff --git a/lsfg-vk-common/src/helpers/paths.cpp b/lsfg-vk-common/src/helpers/paths.cpp index c9ac7ec..bcc92ec 100644 --- a/lsfg-vk-common/src/helpers/paths.cpp +++ b/lsfg-vk-common/src/helpers/paths.cpp @@ -22,7 +22,7 @@ std::filesystem::path ls::findShaderDll() { auto base = std::filesystem::path(xdgPath); for (const auto& frag : FRAGMENTS) { - auto full = base / frag / "Lossless Scaling" / "Lossless.dll"; + auto full = base / frag / "Lossless Scaling" / "lsfg-vk.dll"; if (std::filesystem::exists(full)) return full; } @@ -34,16 +34,16 @@ std::filesystem::path ls::findShaderDll() { auto base = std::filesystem::path(homePath); for (const auto& frag : FRAGMENTS) { - auto full = base / frag / "Lossless Scaling" / "Lossless.dll"; + auto full = base / frag / "Lossless Scaling" / "lsfg-vk.dll"; if (std::filesystem::exists(full)) return full; } } // fallback to same directory - auto local = std::filesystem::current_path() / "Lossless.dll"; + auto local = std::filesystem::current_path() / "lsfg-vk.dll"; if (std::filesystem::exists(local)) return local; - throw ls::error("unable to locate Lossless.dll, please set the path in the configuration"); + throw ls::error("unable to locate lsfg-vk.dll, please set the path in the configuration"); } diff --git a/lsfg-vk-common/src/vulkan/command_buffer.cpp b/lsfg-vk-common/src/vulkan/command_buffer.cpp index cac4732..aaf9fab 100644 --- a/lsfg-vk-common/src/vulkan/command_buffer.cpp +++ b/lsfg-vk-common/src/vulkan/command_buffer.cpp @@ -105,7 +105,8 @@ void CommandBuffer::dispatch(const vk::Vulkan& vk, void CommandBuffer::blitImage(const vk::Vulkan& vk, const std::vector& preBarriers, std::pair images, VkExtent2D extent, - const std::vector& postBarriers) const { + const std::vector& postBarriers, + uint32_t srcLayer, uint32_t dstLayer) const { vk.df().CmdPipelineBarrier(*this->commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, @@ -117,7 +118,8 @@ void CommandBuffer::blitImage(const vk::Vulkan& vk, const VkImageBlit region{ .srcSubresource = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, - .layerCount = 1 + .baseArrayLayer = srcLayer, + .layerCount = 1, }, .srcOffsets = { { 0, 0, 0 }, @@ -126,6 +128,7 @@ void CommandBuffer::blitImage(const vk::Vulkan& vk, }, .dstSubresource = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseArrayLayer = dstLayer, .layerCount = 1 }, .dstOffsets = { @@ -151,7 +154,8 @@ void CommandBuffer::blitImage(const vk::Vulkan& vk, } void CommandBuffer::copyBufferToImage(const vk::Vulkan& vk, - const vk::Buffer& buffer, const vk::Image& image) const { + const vk::Buffer& buffer, const vk::Image& image, + uint32_t dstLayer) const { const VkImageMemoryBarrier barrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .srcAccessMask = VK_ACCESS_NONE, @@ -179,6 +183,7 @@ void CommandBuffer::copyBufferToImage(const vk::Vulkan& vk, .bufferImageHeight = 0, .imageSubresource = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseArrayLayer = dstLayer, .layerCount = 1 }, .imageExtent = { diff --git a/lsfg-vk-common/src/vulkan/image.cpp b/lsfg-vk-common/src/vulkan/image.cpp index 6eb45c4..db1ecbd 100644 --- a/lsfg-vk-common/src/vulkan/image.cpp +++ b/lsfg-vk-common/src/vulkan/image.cpp @@ -6,6 +6,7 @@ #include "lsfg-vk-common/vulkan/vulkan.hpp" #include +#include #include #include @@ -16,7 +17,7 @@ namespace { /// create a image ls::owned_ptr createImage(const vk::Vulkan& vk, VkExtent2D extent, VkFormat format, VkImageUsageFlags usage, - bool external) { + bool external, uint32_t arrayLayers) { VkImage handle{}; const VkExternalMemoryImageCreateInfo externalInfo{ @@ -34,7 +35,7 @@ namespace { .depth = 1 }, .mipLevels = 1, - .arrayLayers = 1, + .arrayLayers = arrayLayers, .samples = VK_SAMPLE_COUNT_1_BIT, .usage = usage, .sharingMode = VK_SHARING_MODE_EXCLUSIVE @@ -121,20 +122,20 @@ namespace { } /// create an image view ls::owned_ptr createImageView(const vk::Vulkan& vk, - VkImage image, VkFormat format) { + VkImage image, VkFormat format, uint32_t arrayLayers) { VkImageView handle{}; const VkImageViewCreateInfo viewInfo{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, .image = image, - .viewType = VK_IMAGE_VIEW_TYPE_2D, + .viewType = arrayLayers == 1 ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_2D_ARRAY, .format = format, .subresourceRange = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .baseMipLevel = 0, .levelCount = 1, .baseArrayLayer = 0, - .layerCount = 1 + .layerCount = arrayLayers } }; auto res = vk.df().CreateImageView(vk.dev(), &viewInfo, VK_NULL_HANDLE, &handle); @@ -155,10 +156,13 @@ Image::Image(const vk::Vulkan& vk, VkFormat format, VkImageUsageFlags usage, std::optional importFd, - std::optional exportFd) : + std::optional exportFd, + uint32_t arrayLayers + ) : image(createImage(vk, extent, format, usage, - importFd.has_value() || exportFd.has_value() + importFd.has_value() || exportFd.has_value(), + arrayLayers )), memory(allocateMemory(vk, *this->image, @@ -166,7 +170,8 @@ Image::Image(const vk::Vulkan& vk, )), view(createImageView(vk, *this->image, - format + format, + arrayLayers )), extent(extent) { } diff --git a/lsfg-vk-layer/.clang-tidy b/lsfg-vk-layer/.clang-tidy index b2348d2..a3c1728 100644 --- a/lsfg-vk-layer/.clang-tidy +++ b/lsfg-vk-layer/.clang-tidy @@ -29,4 +29,3 @@ Checks: # Vulkan layers often require C-style memory access - -cppcoreguidelines-pro-bounds-pointer-arithmetic - -cppcoreguidelines-pro-type-union-access -- -clang-diagnostic-unsafe-buffer-usage diff --git a/lsfg-vk-layer/CMakeLists.txt b/lsfg-vk-layer/CMakeLists.txt index 0413479..f005c3b 100644 --- a/lsfg-vk-layer/CMakeLists.txt +++ b/lsfg-vk-layer/CMakeLists.txt @@ -10,7 +10,6 @@ target_link_libraries(lsfg-vk-layer PUBLIC lsfg-vk-backend) target_compile_options(lsfg-vk-layer PRIVATE - -Wno-unknown-warning-option -Wno-unsafe-buffer-usage) # Array indexing set_target_properties(lsfg-vk-layer PROPERTIES diff --git a/lsfg-vk-layer/src/entrypoint.cpp b/lsfg-vk-layer/src/entrypoint.cpp index 6256b9f..d04dd8b 100644 --- a/lsfg-vk-layer/src/entrypoint.cpp +++ b/lsfg-vk-layer/src/entrypoint.cpp @@ -22,17 +22,17 @@ using namespace lsfgvk::layer; namespace { - // global layer info initialized at layer negotiation + /// Global layer info initialized at layer negotiation struct LayerInfo { - std::unordered_map map; //!< function pointer override map + std::unordered_map map; //!< Function pointer override map PFN_vkGetInstanceProcAddr GetInstanceProcAddr; Root root; }* layer_info; // NOLINT (global variable) - // instance-wide info initialized at instance creation(s) + /// Instance-wide info initialized at instance creation(s) struct InstanceInfo { - std::vector handles; // there may be several instances + std::vector handles; // There may be several instances vk::VulkanInstanceFuncs funcs; std::unordered_map devices; @@ -40,44 +40,44 @@ namespace { std::unordered_map swapchainInfos; }* instance_info; // NOLINT (global variable) - // create instance + /// Create instance VkResult myvkCreateInstance( const VkInstanceCreateInfo* info, const VkAllocationCallbacks* alloc, VkInstance* instance) { - // apply layer chaining + // Apply layer chaining auto* layerInfo = reinterpret_cast(const_cast(info->pNext)); while (layerInfo && (layerInfo->sType != VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO || layerInfo->function != VK_LAYER_LINK_INFO)) { layerInfo = reinterpret_cast(const_cast(layerInfo->pNext)); } if (!layerInfo) { - std::cerr << "lsfg-vk: no layer info found in pNext chain, " + std::cerr << "lsfg-vk: No layer info found in pNext chain, " "the previous layer does not follow spec\n"; return VK_ERROR_INITIALIZATION_FAILED; } auto* linkInfo = layerInfo->u.pLayerInfo; if (!linkInfo) { - std::cerr << "lsfg-vk: link info is null, " + std::cerr << "lsfg-vk: Link info is null, " "the previous layer does not follow spec\n"; return VK_ERROR_INITIALIZATION_FAILED; } layer_info->GetInstanceProcAddr = linkInfo->pfnNextGetInstanceProcAddr; if (!layer_info->GetInstanceProcAddr) { - std::cerr << "lsfg-vk: next layer's vkGetInstanceProcAddr is null, " + std::cerr << "lsfg-vk: Next layer's vkGetInstanceProcAddr is null, " "the previous layer does not follow spec\n"; return VK_ERROR_INITIALIZATION_FAILED; } - layerInfo->u.pLayerInfo = linkInfo->pNext; // advance for next layer + layerInfo->u.pLayerInfo = linkInfo->pNext; // Advance for next layer - // create instance + // Create instance auto* vkCreateInstance = reinterpret_cast( layer_info->GetInstanceProcAddr(VK_NULL_HANDLE, "vkCreateInstance")); if (!vkCreateInstance) { - std::cerr << "lsfg-vk: failed to get next layer's vkCreateInstance, " + std::cerr << "lsfg-vk: Failed to get next layer's vkCreateInstance, " "the previous layer does not follow spec\n"; return VK_ERROR_INITIALIZATION_FAILED; } @@ -103,64 +103,64 @@ namespace { return VK_SUCCESS; } catch (const ls::vulkan_error& e) { if (e.error() == VK_ERROR_EXTENSION_NOT_PRESENT) - std::cerr << "lsfg-vk: required Vulkan instance extensions are not present. " + std::cerr << "lsfg-vk: Required Vulkan instance extensions are not present. " "Your GPU driver is not supported.\n"; return e.error(); } } - // create device + /// Create device VkResult myvkCreateDevice( VkPhysicalDevice physdev, const VkDeviceCreateInfo* info, const VkAllocationCallbacks* alloc, VkDevice* device) { - // apply layer chaining + // Apply layer chaining auto* layerInfo = reinterpret_cast(const_cast(info->pNext)); while (layerInfo && (layerInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO || layerInfo->function != VK_LAYER_LINK_INFO)) { layerInfo = reinterpret_cast(const_cast(layerInfo->pNext)); } if (!layerInfo) { - std::cerr << "lsfg-vk: no layer info found in pNext chain, " + std::cerr << "lsfg-vk: No layer info found in pNext chain, " "the previous layer does not follow spec\n"; return VK_ERROR_INITIALIZATION_FAILED; } auto* linkInfo = layerInfo->u.pLayerInfo; if (!linkInfo) { - std::cerr << "lsfg-vk: link info is null, " + std::cerr << "lsfg-vk: Link info is null, " "the previous layer does not follow spec\n"; return VK_ERROR_INITIALIZATION_FAILED; } instance_info->funcs.GetDeviceProcAddr = linkInfo->pfnNextGetDeviceProcAddr; if (!linkInfo->pfnNextGetDeviceProcAddr) { - std::cerr << "lsfg-vk: next layer's vkGetDeviceProcAddr is null, " + std::cerr << "lsfg-vk: Next layer's vkGetDeviceProcAddr is null, " "the previous layer does not follow spec\n"; return VK_ERROR_INITIALIZATION_FAILED; } - layerInfo->u.pLayerInfo = linkInfo->pNext; // advance for next layer + layerInfo->u.pLayerInfo = linkInfo->pNext; // Advance for next layer - // fetch device loader functions + // Fetch device loader functions layerInfo = reinterpret_cast(const_cast(info->pNext)); while (layerInfo && (layerInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO || layerInfo->function != VK_LOADER_DATA_CALLBACK)) { layerInfo = reinterpret_cast(const_cast(layerInfo->pNext)); } if (!layerInfo) { - std::cerr << "lsfg-vk: no layer loader data found in pNext chain.\n"; + std::cerr << "lsfg-vk: No layer loader data found in pNext chain.\n"; return VK_ERROR_INITIALIZATION_FAILED; } auto* setLoaderData = layerInfo->u.pfnSetDeviceLoaderData; if (!setLoaderData) { - std::cerr << "lsfg-vk: instance loader data function is null.\n"; + std::cerr << "lsfg-vk: Instance loader data function is null.\n"; return VK_ERROR_INITIALIZATION_FAILED; } - // create device + // Create device try { VkDeviceCreateInfo newInfo = *info; layer_info->root.modifyDeviceCreateInfo(newInfo, @@ -172,12 +172,12 @@ namespace { ); } catch (const ls::vulkan_error& e) { if (e.error() == VK_ERROR_EXTENSION_NOT_PRESENT) - std::cerr << "lsfg-vk: required Vulkan device extensions are not present. " + std::cerr << "lsfg-vk: Required Vulkan device extensions are not present. " "Your GPU driver is not supported.\n"; return e.error(); } - // create layer instance + // Create layer instance try { instance_info->devices.emplace( *device, @@ -189,25 +189,25 @@ namespace { ) ); } catch (const std::exception& e) { - std::cerr << "lsfg-vk: something went wrong during lsfg-vk initialization:\n"; + std::cerr << "lsfg-vk: Something went wrong during lsfg-vk initialization:\n"; std::cerr << "- " << e.what() << '\n'; } return VK_SUCCESS; } - // destroy device + /// Destroy device void myvkDestroyDevice(VkDevice device, const VkAllocationCallbacks* alloc) { - // destroy layer instance + // Destroy layer instance auto it = instance_info->devices.find(device); if (it != instance_info->devices.end()) instance_info->devices.erase(it); - // destroy device + // Destroy device auto vkDestroyDevice = reinterpret_cast( instance_info->funcs.GetDeviceProcAddr(device, "vkDestroyDevice")); if (!vkDestroyDevice) { - std::cerr << "lsfg-vk: failed to get next layer's vkDestroyDevice, " + std::cerr << "lsfg-vk: Failed to get next layer's vkDestroyDevice, " "the previous layer does not follow spec\n"; return; } @@ -215,24 +215,24 @@ namespace { vkDestroyDevice(device, alloc); } - // destroy instance + /// Destroy instance void myvkDestroyInstance(VkInstance instance, const VkAllocationCallbacks* alloc) { - // remove instance handle + // Remove instance handle auto it = std::ranges::find(instance_info->handles, instance); if (it != instance_info->handles.end()) instance_info->handles.erase(it); - // destroy instance info if no handles remain + // Destroy instance info if no handles remain if (instance_info->handles.empty()) { delete instance_info; // NOLINT (memory management) instance_info = nullptr; } - // destroy instance + // Destroy instance auto vkDestroyInstance = reinterpret_cast( layer_info->GetInstanceProcAddr(instance, "vkDestroyInstance")); if (!vkDestroyInstance) { - std::cerr << "lsfg-vk: failed to get next layer's vkDestroyInstance, " + std::cerr << "lsfg-vk: Failed to get next layer's vkDestroyInstance, " "the previous layer does not follow spec\n"; return; } @@ -240,7 +240,7 @@ namespace { vkDestroyInstance(instance, alloc); } - // get optional function pointer override + /// Get optional function pointer override PFN_vkVoidFunction getProcAddr(const std::string& name) { auto it = layer_info->map.find(name); if (it != layer_info->map.end()) @@ -248,7 +248,7 @@ namespace { return nullptr; } - // get instance-level function pointers + /// Get instance-level function pointers PFN_vkVoidFunction myvkGetInstanceProcAddr(VkInstance instance, const char* name) { if (!name) return nullptr; @@ -259,7 +259,7 @@ namespace { return layer_info->GetInstanceProcAddr(instance, name); } - // get device-level function pointers + /// Get device-level function pointers PFN_vkVoidFunction myvkGetDeviceProcAddr(VkDevice device, const char* name) { if (!name) return nullptr; @@ -282,7 +282,7 @@ namespace { return VK_ERROR_INITIALIZATION_FAILED; try { - // retire old swapchain + // Retire old swapchain if (info->oldSwapchain) { const auto& info_mapping = instance_info->swapchainInfos.find(info->oldSwapchain); if (info_mapping != instance_info->swapchainInfos.end()) @@ -295,9 +295,9 @@ namespace { layer_info->root.removeSwapchainContext(info->oldSwapchain); } - layer_info->root.update(); // ensure config is up to date + layer_info->root.update(); // Ensure config is up to date - // create swapchain + // Create swapchain VkSwapchainCreateInfoKHR newInfo = *info; layer_info->root.modifySwapchainCreateInfo(it->second, newInfo, [=, newInfo = &newInfo]() { @@ -308,7 +308,7 @@ namespace { } ); - // get all swapchain images + // Get all swapchain images uint32_t imageCount{}; auto res = it->second.df().GetSwapchainImagesKHR(device, *swapchain, &imageCount, VK_NULL_HANDLE); @@ -329,7 +329,7 @@ namespace { .presentMode = newInfo.presentMode }).first->second; - // create lsfg-vk swapchain + // Create lsfg-vk swapchain layer_info->root.createSwapchainContext(it->second, *swapchain, info); instance_info->swapchains.emplace(*swapchain, @@ -337,11 +337,11 @@ namespace { return res; } catch (const ls::vulkan_error& e) { - std::cerr << "lsfg-vk: something went wrong during lsfg-vk swapchain creation:\n"; + std::cerr << "lsfg-vk: Something went wrong during lsfg-vk swapchain creation:\n"; std::cerr << "- " << e.what() << '\n'; return e.error(); } catch (const std::exception& e) { - std::cerr << "lsfg-vk: something went wrong during lsfg-vk swapchain creation:\n"; + std::cerr << "lsfg-vk: Something went wrong during lsfg-vk swapchain creation:\n"; std::cerr << "- " << e.what() << '\n'; return VK_ERROR_INITIALIZATION_FAILED; } @@ -349,16 +349,15 @@ namespace { VkResult myvkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* info) { #pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunknown-warning-option" #pragma clang diagnostic ignored "-Wunsafe-buffer-usage" VkResult result = VK_SUCCESS; - // ensure layer config is up to date + // Ensure layer config is up to date bool reload{}; try { reload = layer_info->root.update(); } catch (const std::exception&) { - reload = false; // ignore parse errors + reload = false; // Ignore parse errors } if (reload) { @@ -377,7 +376,7 @@ namespace { } } - // present each swapchain + // Present each swapchain for (size_t i = 0; i < info->swapchainCount; i++) { const auto& swapchain = info->pSwapchains[i]; @@ -403,7 +402,7 @@ namespace { if (e.error() != VK_ERROR_OUT_OF_DATE_KHR) { std::cerr << "lsfg-vk: something went wrong during lsfg-vk swapchain presentation:\n"; std::cerr << "- " << e.what() << '\n'; - } // silently swallow out-of-date errors + } // Silently swallow out-of-date errors result = e.error(); } catch (const std::exception& e) { @@ -438,7 +437,7 @@ namespace { layer_info->root.removeSwapchainContext(swapchain); - // destroy swapchain + // Destroy swapchain it->second.df().DestroySwapchainKHR(device, swapchain, alloc); } } @@ -446,13 +445,13 @@ namespace { /// Vulkan layer entrypoint __attribute__((visibility("default"))) VkResult vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface* pVersionStruct) { - // ensure loader compatibility + // Ensure loader compatibility if (!pVersionStruct || pVersionStruct->sType != LAYER_NEGOTIATE_INTERFACE_STRUCT || pVersionStruct->loaderLayerInterfaceVersion < 2) return VK_ERROR_INITIALIZATION_FAILED; - // if the layer has already been initialized, skip + // If the layer has already been initialized, skip if (layer_info) { pVersionStruct->loaderLayerInterfaceVersion = 2; pVersionStruct->pfnGetPhysicalDeviceProcAddr = nullptr; @@ -461,7 +460,7 @@ VkResult vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface* pVers return VK_SUCCESS; } - // load the layer configuration + // Load the layer configuration try { layer_info = new LayerInfo { // NOLINT (memory management) .map = { @@ -478,20 +477,20 @@ VkResult vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface* pVers .root = Root() }; - if (!layer_info->root.active()) { // skip inactive + if (!layer_info->root.active()) { // Skip inactive delete layer_info; // NOLINT (memory management) layer_info = nullptr; return VK_ERROR_INITIALIZATION_FAILED; } } catch (const std::exception& e) { - std::cerr << "lsfg-vk: something went wrong during lsfg-vk layer initialization:\n"; + std::cerr << "lsfg-vk: Something went wrong during lsfg-vk layer initialization:\n"; std::cerr << "- " << e.what() << '\n'; return VK_ERROR_INITIALIZATION_FAILED; } - // emplace function pointers/version + // Emplace function pointers/version pVersionStruct->loaderLayerInterfaceVersion = 2; pVersionStruct->pfnGetPhysicalDeviceProcAddr = nullptr; pVersionStruct->pfnGetDeviceProcAddr = myvkGetDeviceProcAddr; diff --git a/lsfg-vk-layer/src/instance.cpp b/lsfg-vk-layer/src/instance.cpp index 00da4d1..53989e4 100644 --- a/lsfg-vk-layer/src/instance.cpp +++ b/lsfg-vk-layer/src/instance.cpp @@ -2,10 +2,10 @@ #include "instance.hpp" #include "lsfg-vk-common/helpers/paths.hpp" -#include "swapchain.hpp" #include "lsfg-vk-common/configuration/detection.hpp" #include "lsfg-vk-common/helpers/errors.hpp" #include "lsfg-vk-common/vulkan/vulkan.hpp" +#include "swapchain.hpp" #include #include @@ -25,7 +25,7 @@ using namespace lsfgvk; using namespace lsfgvk::layer; namespace { - /// helper function to add required extensions + /// Helper function to add required extensions std::vector add_extensions(const char* const* existingExtensions, size_t count, const std::vector& requiredExtensions) { std::vector extensions(count); @@ -45,14 +45,14 @@ namespace { } Root::Root() { - // find active profile + // Find active profile const auto& profile = findProfile(this->config.get(), ls::identify()); if (!profile.has_value()) return; this->active_profile = profile->second; - std::cerr << "lsfg-vk: using profile with name '" << this->active_profile->name << "' "; + std::cerr << "lsfg-vk: Using profile with name '" << this->active_profile->name << "' "; switch (profile->first) { case ls::IdentType::OVERRIDE: std::cerr << "(identified via override)\n"; @@ -167,10 +167,10 @@ void Root::modifySwapchainCreateInfo(const vk::Vulkan& vk, VkSwapchainCreateInfo void Root::createSwapchainContext(const vk::Vulkan& vk, VkSwapchainKHR swapchain, const SwapchainInfo& info) { if (!this->active_profile.has_value()) - throw ls::error("attempted to create swapchain context while layer is inactive"); + throw ls::error("Attempted to create swapchain context while layer is inactive"); const auto& profile = *this->active_profile; - if (!this->backend.has_value()) { // emplace backend late, due to loader bug + if (!this->backend.has_value()) { // Emplace backend late, due to loader bug const auto& global = this->config.get().global(); setenv("DISABLE_LSFGVK", "1", 1); @@ -183,23 +183,12 @@ void Root::createSwapchainContext(const vk::Vulkan& vk, dll = ls::findShaderDll(); this->backend.emplace( - [gpu = profile.gpu]( - const std::string& deviceName, - std::pair ids, - const std::optional& pci - ) { - if (!gpu) - return true; - - return (deviceName == *gpu) - || (ids.first + ":" + ids.second == *gpu) - || (pci && *pci == *gpu); - }, + profile.gpu.value_or(""), dll, global.allow_fp16 ); } catch (const std::exception& e) { unsetenv("DISABLE_LSFGVK"); - throw ls::error("failed to create backend instance", e); + throw ls::error("Failed to create backend instance", e); } unsetenv("DISABLE_LSFGVK"); diff --git a/lsfg-vk-layer/src/instance.hpp b/lsfg-vk-layer/src/instance.hpp index 35083b6..57d41de 100644 --- a/lsfg-vk-layer/src/instance.hpp +++ b/lsfg-vk-layer/src/instance.hpp @@ -2,13 +2,14 @@ #pragma once -#include "lsfg-vk-backend/lsfgvk.hpp" +#include "lsfg-vk/lsfgvk.hpp" #include "lsfg-vk-common/configuration/config.hpp" #include "lsfg-vk-common/helpers/errors.hpp" #include "lsfg-vk-common/helpers/pointers.hpp" #include "lsfg-vk-common/vulkan/vulkan.hpp" #include "swapchain.hpp" +#include #include #include @@ -16,49 +17,75 @@ namespace lsfgvk::layer { - /// root context of the lsfg-vk layer + /// + /// Root context of the lsfg-vk layer + /// class Root { public: - /// create the lsfg-vk root context + /// + /// Create the lsfg-vk root context + /// /// @throws ls::error on failure + /// Root(); - /// check if the layer is active - /// @return true if active + /// + /// Check if the layer is active + /// + /// @return true If active + /// [[nodiscard]] bool active() const { return this->active_profile.has_value(); } - /// ensure the layer is up-to-date - /// @return true if the configuration was updated + /// + /// Ensure the layer is up-to-date + /// + /// @return true If the configuration was updated + /// bool update(); - /// modify instance create info - /// @param createInfo original create info - /// @param finish function to call after modification + /// + /// Modify instance create info + /// + /// @param createInfo Original create info + /// @param finish Function to call after modification + /// void modifyInstanceCreateInfo(VkInstanceCreateInfo& createInfo, const std::function& finish) const; - /// modify device create info - /// @param createInfo original create info - /// @param finish function to call after modification + /// + /// Modify device create info + /// + /// @param createInfo Original create info + /// @param finish Function to call after modification + /// void modifyDeviceCreateInfo(VkDeviceCreateInfo& createInfo, const std::function& finish) const; - - /// modify swapchain create info - /// @param vk vulkan instance - /// @param createInfo original create info - /// @param finish function to call after modification + /// + /// Modify swapchain create info + /// + /// @param vk Vulkan instance + /// @param createInfo Original create info + /// @param finish Function to call after modification + /// void modifySwapchainCreateInfo(const vk::Vulkan& vk, VkSwapchainCreateInfoKHR& createInfo, const std::function& finish) const; - /// create swapchain context - /// @param vk vulkan instance - /// @param swapchain swapchain handle - /// @param info swapchain info + + /// + /// Create swapchain context + /// + /// @param vk Vulkan instance + /// @param swapchain Swapchain handle + /// @param info Swapchain info /// @throws ls::error on failure + /// void createSwapchainContext(const vk::Vulkan& vk, VkSwapchainKHR swapchain, const SwapchainInfo& info); - /// get swapchain context - /// @param swapchain swapchain handle - /// @return swapchain context + /// + /// Get swapchain context + /// + /// @param swapchain Swapchain handle + /// @return swapchain Context /// @throws ls::error if not found + /// [[nodiscard]] Swapchain& getSwapchainContext(VkSwapchainKHR swapchain) { const auto& it = this->swapchains.find(swapchain); if (it == this->swapchains.end()) @@ -66,14 +93,17 @@ namespace lsfgvk::layer { return it->second; } - /// remove swapchain context - /// @param swapchain swapchain handle + /// + /// Remove swapchain context + /// + /// @param swapchain Swapchain handle + /// void removeSwapchainContext(VkSwapchainKHR swapchain); private: ls::WatchedConfig config; std::optional active_profile; - ls::lazy backend; + ls::lazy backend; std::unordered_map swapchains; }; diff --git a/lsfg-vk-layer/src/swapchain.cpp b/lsfg-vk-layer/src/swapchain.cpp index 85033ae..b56d541 100644 --- a/lsfg-vk-layer/src/swapchain.cpp +++ b/lsfg-vk-layer/src/swapchain.cpp @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-3.0-or-later */ #include "swapchain.hpp" -#include "lsfg-vk-backend/lsfgvk.hpp" +#include "lsfg-vk/lsfgvk.hpp" #include "lsfg-vk-common/configuration/config.hpp" #include "lsfg-vk-common/helpers/errors.hpp" #include "lsfg-vk-common/helpers/pointers.hpp" @@ -10,11 +10,10 @@ #include "lsfg-vk-common/vulkan/semaphore.hpp" #include "lsfg-vk-common/vulkan/vulkan.hpp" -#include #include #include #include -#include +#include #include #include #include @@ -25,6 +24,7 @@ using namespace lsfgvk; using namespace lsfgvk::layer; namespace { + /// Barrier helper VkImageMemoryBarrier barrierHelper(VkImage handle, VkAccessFlags srcAccessMask, VkAccessFlags dstAccessMask, @@ -66,66 +66,45 @@ void layer::context_ModifySwapchainCreateInfo(const ls::GameConf& profile, uint3 } } -Swapchain::Swapchain(const vk::Vulkan& vk, backend::Instance& backend, +Swapchain::Swapchain(const vk::Vulkan& vk, lsfgvk::Instance& backend, ls::GameConf profile, SwapchainInfo info) : instance(backend), profile(std::move(profile)), info(std::move(info)) { const VkExtent2D extent = this->info.extent; - const bool hdr = this->info.format > 57; - - std::vector sourceFds(2); - std::vector destinationFds(this->profile.multiplier - 1); - - this->sourceImages.reserve(sourceFds.size()); - for (int& fd : sourceFds) - this->sourceImages.emplace_back(vk, - extent, hdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM, - VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - std::nullopt, &fd); - - this->destinationImages.reserve(destinationFds.size()); - for (int& fd : destinationFds) - this->destinationImages.emplace_back(vk, - extent, hdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM, - VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, - std::nullopt, &fd); - - int syncFd{}; - this->syncSemaphore.emplace(vk, 0, std::nullopt, &syncFd); try { - this->ctx = ls::owned_ptr>( - new ls::R(backend.openContext( - { sourceFds.at(0), sourceFds.at(1) }, destinationFds, syncFd, - extent.width, extent.height, - hdr, 1.0F / this->profile.flow_scale, this->profile.performance_mode - )), - [backend = &backend](ls::R& ctx) { - backend->closeContext(ctx); - } + this->ctx = std::make_unique( + backend, + extent.width, extent.height, + this->profile.flow_scale, + this->profile.performance_mode ); - - backend::makeLeaking(); // don't worry about it :3 + this->total = static_cast(this->profile.multiplier) - 1; } catch (const std::exception& e) { - throw ls::error("failed to create swapchain context", e); + throw ls::error("Failed to create swapchain context", e); } + const auto exportedFds = this->ctx->exportFds(); + this->sourceImage.emplace(vk, + extent, VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + exportedFds.sourceFd, std::nullopt, 2); + this->destinationImage.emplace(vk, + extent, VK_FORMAT_R8G8B8A8_UNORM, + VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, + exportedFds.destinationFd); + this->syncSemaphore.emplace(vk, 0, exportedFds.syncFd); + this->renderCommandBuffer.emplace(vk); this->renderFence.emplace(vk); - for (size_t i = 0; i < this->destinationImages.size(); i++) { + this->finalSemaphore.emplace(vk); + for (size_t i = 0; i < this->total; i++) { this->passes.emplace_back(RenderPass { .commandBuffer = vk::CommandBuffer(vk), - .acquireSemaphore = vk::Semaphore(vk) + .acquireSemaphore = vk::Semaphore(vk), + .copySemaphore = vk::Semaphore(vk) }); } - - const size_t frames = std::max(this->info.images.size(), this->destinationImages.size() + 2); - for (size_t i = 0; i < frames; i++) { - this->postCopySemaphores.emplace_back( - vk::Semaphore(vk), - vk::Semaphore(vk) - ); - } } VkResult Swapchain::present(const vk::Vulkan& vk, @@ -133,19 +112,18 @@ VkResult Swapchain::present(const vk::Vulkan& vk, void* next_chain, uint32_t imageIdx, const std::vector& semaphores) { const auto& swapchainImage = this->info.images.at(imageIdx); - const auto& sourceImage = this->sourceImages.at(this->fidx % 2); + const auto sourceImageIdx{static_cast(this->iteration) % 2}; - // schedule frame generation + // Schedule frame generation try { - this->instance.get().scheduleFrames(this->ctx.get()); + this->ctx->dispatch(this->total); } catch (const std::exception& e) { - throw ls::error("failed to schedule frames", e); + throw ls::error("Failed to schedule frames", e); } - // update present mode when not using pacing + // Update present mode when not using pacing if (this->profile.pacing == ls::Pacing::None) { #pragma clang diagnostic push -#pragma clang diagnostic ignored "-Wunknown-warning-option" #pragma clang diagnostic ignored "-Wunsafe-buffer-usage" auto* info = reinterpret_cast(next_chain); while (info) { @@ -160,12 +138,12 @@ VkResult Swapchain::present(const vk::Vulkan& vk, #pragma clang diagnostic pop } - // wait for completion of previous frame - if (this->fidx && !this->renderFence->wait(vk, 150ULL * 1000 * 1000)) + // Wait for completion of previous frame + if (this->iteration && !this->renderFence->wait(vk, 150ULL * 1000 * 1000)) throw ls::vulkan_error(VK_TIMEOUT, "vkWaitForFences() failed"); this->renderFence->reset(vk); - // copy swapchain image into backend source image + // Copy swapchain image into backend source image const auto& cmdbuf = *this->renderCommandBuffer; cmdbuf.begin(vk); @@ -177,15 +155,15 @@ VkResult Swapchain::present(const vk::Vulkan& vk, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL ), - barrierHelper(sourceImage.handle(), + barrierHelper(this->sourceImage->handle(), VK_ACCESS_NONE, VK_ACCESS_TRANSFER_WRITE_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL ), }, - { swapchainImage, sourceImage.handle() }, - sourceImage.getExtent(), + { swapchainImage, this->sourceImage->handle() }, + this->sourceImage->getExtent(), { barrierHelper(swapchainImage, VK_ACCESS_TRANSFER_READ_BIT, @@ -193,39 +171,40 @@ VkResult Swapchain::present(const vk::Vulkan& vk, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR ), - } + }, + 0, sourceImageIdx ); cmdbuf.end(vk); + cmdbuf.submit(vk, semaphores, VK_NULL_HANDLE, 0, - {}, this->syncSemaphore->handle(), this->idx++ + {}, this->syncSemaphore->handle(), this->syncValue ); - for (size_t i = 0; i < this->destinationImages.size(); i++) { - auto& pcs = this->postCopySemaphores.at(this->idx % this->postCopySemaphores.size()); - auto& destinationImage = this->destinationImages.at(i); - auto& pass = this->passes.at(i); + for (size_t i = 0; i < this->passes.size(); i++) { + auto& pass{this->passes.at(i)}; + const bool last{i == (this->passes.size() - 1)}; - // acquire swapchain image - uint32_t aqImageIdx{}; + // Acquire swapchain image + uint32_t swapchainImageIdx{}; auto res = vk.df().AcquireNextImageKHR(vk.dev(), swapchain, UINT64_MAX, pass.acquireSemaphore.handle(), VK_NULL_HANDLE, - &aqImageIdx + &swapchainImageIdx ); if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR) throw ls::vulkan_error(res, "vkAcquireNextImageKHR() failed"); - const auto& aquiredSwapchainImage = this->info.images.at(aqImageIdx); + const auto& aquiredSwapchainImage = this->info.images.at(swapchainImageIdx); - // copy backend destination image into swapchain image + // Copy backend destination image into swapchain image auto& cmdbuf = pass.commandBuffer; cmdbuf.begin(vk); cmdbuf.blitImage(vk, { - barrierHelper(destinationImage.handle(), + barrierHelper(this->destinationImage->handle(), VK_ACCESS_NONE, VK_ACCESS_TRANSFER_READ_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, @@ -238,8 +217,8 @@ VkResult Swapchain::present(const vk::Vulkan& vk, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL ), }, - { destinationImage.handle(), aquiredSwapchainImage }, - destinationImage.getExtent(), + { this->destinationImage->handle(), aquiredSwapchainImage }, + this->destinationImage->getExtent(), { barrierHelper(aquiredSwapchainImage, VK_ACCESS_TRANSFER_WRITE_BIT, @@ -250,48 +229,43 @@ VkResult Swapchain::present(const vk::Vulkan& vk, } ); - std::vector waitSemaphores{ pass.acquireSemaphore.handle() }; - if (i) { // non-first pass - const auto& prevPCS = this->postCopySemaphores.at((this->idx - 1) % this->postCopySemaphores.size()); - waitSemaphores.push_back(prevPCS.second.handle()); - } - - const std::vector signalSemaphores{ - pcs.first.handle(), - pcs.second.handle() - }; - cmdbuf.end(vk); + + std::vector signalSemaphores{ pass.copySemaphore.handle() }; + if (last) + signalSemaphores.push_back(this->finalSemaphore->handle()); + + this->syncValue++; + cmdbuf.submit(vk, - waitSemaphores, this->syncSemaphore->handle(), this->idx, - signalSemaphores, VK_NULL_HANDLE, 0, - i == this->destinationImages.size() - 1 ? this->renderFence->handle() : VK_NULL_HANDLE + { pass.acquireSemaphore.handle() }, this->syncSemaphore->handle(), this->syncValue, + signalSemaphores, last ? nullptr : this->syncSemaphore->handle(), this->syncValue + 1, + last ? this->renderFence->handle() : VK_NULL_HANDLE ); - // present swapchain image + this->syncValue++; + + // Present swapchain image const VkPresentInfoKHR presentInfo{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .pNext = i ? nullptr : next_chain, .waitSemaphoreCount = 1, - .pWaitSemaphores = &pcs.first.handle(), + .pWaitSemaphores = &pass.copySemaphore.handle(), .swapchainCount = 1, .pSwapchains = &swapchain, - .pImageIndices = &aqImageIdx, + .pImageIndices = &swapchainImageIdx, }; res = vk.df().QueuePresentKHR(queue, &presentInfo); if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR) throw ls::vulkan_error(res, "vkQueuePresentKHR() failed"); - - this->idx++; } - // present original swapchain image - auto& lastPCS = this->postCopySemaphores.at((this->idx - 1) % this->postCopySemaphores.size()); + // Present original swapchain image const VkPresentInfoKHR presentInfo{ .sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR, .waitSemaphoreCount = 1, - .pWaitSemaphores = &lastPCS.second.handle(), + .pWaitSemaphores = &this->finalSemaphore->handle(), .swapchainCount = 1, .pSwapchains = &swapchain, .pImageIndices = &imageIdx, @@ -300,6 +274,7 @@ VkResult Swapchain::present(const vk::Vulkan& vk, if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR) throw ls::vulkan_error(res, "vkQueuePresentKHR() failed"); - this->fidx++; + this->iteration++; + return res; } diff --git a/lsfg-vk-layer/src/swapchain.hpp b/lsfg-vk-layer/src/swapchain.hpp index 3c711a0..4893a0a 100644 --- a/lsfg-vk-layer/src/swapchain.hpp +++ b/lsfg-vk-layer/src/swapchain.hpp @@ -2,7 +2,7 @@ #pragma once -#include "lsfg-vk-backend/lsfgvk.hpp" +#include "lsfg-vk/lsfgvk.hpp" #include "lsfg-vk-common/configuration/config.hpp" #include "lsfg-vk-common/helpers/pointers.hpp" #include "lsfg-vk-common/vulkan/command_buffer.hpp" @@ -12,15 +12,18 @@ #include "lsfg-vk-common/vulkan/timeline_semaphore.hpp" #include "lsfg-vk-common/vulkan/vulkan.hpp" +#include #include -#include +#include #include #include namespace lsfgvk::layer { - /// swapchain info struct + /// + /// Swapchain info struct + /// struct SwapchainInfo { std::vector images; VkFormat format; @@ -29,53 +32,67 @@ namespace lsfgvk::layer { VkPresentModeKHR presentMode; }; - /// modify the swapchain create info based on the profile pre-swapchain creation - /// @param profile active game profile - /// @param maxImages maximum number of images supported by the surface - /// @param createInfo swapchain create info to modify + /// + /// Modify the swapchain create info based on the profile pre-swapchain creation + /// + /// @param profile Active game profile + /// @param maxImages Maximum number of images supported by the surface + /// @param createInfo Swapchain create info to modify + /// void context_ModifySwapchainCreateInfo(const ls::GameConf& profile, uint32_t maxImages, VkSwapchainCreateInfoKHR& createInfo); - /// swapchain context for a layer instance + /// + /// Swapchain context for a layer instance + /// class Swapchain { public: - /// create a new swapchain context - /// @param vk vulkan instance + /// + /// Create a new swapchain context + /// + /// @param vk Vulkan instance /// @param backend lsfg-vk backend instance - /// @param profile active game profile - /// @param info swapchain info - Swapchain(const vk::Vulkan& vk, backend::Instance& backend, + /// @param profile Active game profile + /// @param info Swapchain info + /// + Swapchain(const vk::Vulkan& vk, lsfgvk::Instance& backend, ls::GameConf profile, SwapchainInfo info); - /// present a frame - /// @param vk vulkan instance - /// @param queue presentation queue - /// @param next_chain next chain pointer for the present info (WARN: shared!) - /// @param imageIdx swapchain image index to present to - /// @param semaphores semaphores to wait on before presenting - /// @throws ls::vulkan_error on vulkan errors + /// + /// Present a frame + /// + /// @param vk Vulkan instance + /// @param queue Presentation queue + /// @param next_chain next chain pointer for the present info (WARNING: shared!) + /// @param imageIdx Swapchain image index to present to + /// @param semaphores Semaphores to wait on before presenting + /// @throws ls::vulkan_error on vulkan error + /// VkResult present(const vk::Vulkan& vk, VkQueue queue, VkSwapchainKHR swapchain, void* next_chain, uint32_t imageIdx, const std::vector& semaphores); private: - std::vector sourceImages; - std::vector destinationImages; + ls::lazy sourceImage; + ls::lazy destinationImage; ls::lazy syncSemaphore; ls::lazy renderCommandBuffer; ls::lazy renderFence; + ls::lazy finalSemaphore; struct RenderPass { vk::CommandBuffer commandBuffer; vk::Semaphore acquireSemaphore; + vk::Semaphore copySemaphore; }; std::vector passes; - std::vector> postCopySemaphores; - ls::R instance; - ls::owned_ptr> ctx; - size_t idx{1}; - size_t fidx{0}; // real frame index + ls::R instance; + std::unique_ptr ctx; + uint32_t total{}; + + size_t iteration{0}; + size_t syncValue{1}; ls::GameConf profile; SwapchainInfo info; diff --git a/lsfg-vk-ui/.clang-tidy b/lsfg-vk-ui/.clang-tidy index 042dd46..33a11dd 100644 --- a/lsfg-vk-ui/.clang-tidy +++ b/lsfg-vk-ui/.clang-tidy @@ -25,3 +25,5 @@ Checks: - -portability-avoid-pragma-once # Qt requires use of raw pointers in many places - -cppcoreguidelines-owning-memory +# Qt seems to break some ranges algorithms in GCC +- -modernize-use-ranges diff --git a/lsfg-vk-ui/CMakeLists.txt b/lsfg-vk-ui/CMakeLists.txt index edd23c1..335c516 100644 --- a/lsfg-vk-ui/CMakeLists.txt +++ b/lsfg-vk-ui/CMakeLists.txt @@ -28,15 +28,13 @@ set_target_properties(lsfg-vk-ui PROPERTIES AUTOUIC ON) target_compile_options(lsfg-vk-ui PRIVATE # QT-codegen warnings - -Wno-unknown-warning-option -Wno-ctad-maybe-unsupported -Wno-unsafe-buffer-usage-in-libc-call - -Wno-global-constructors - -Wno-unsafe-buffer-usage) + -Wno-unsafe-buffer-usage + -Wno-global-constructors) target_link_libraries(lsfg-vk-ui PRIVATE lsfg-vk-common - PRIVATE lsfg-vk-backend PRIVATE Qt6::Quick) install(TARGETS lsfg-vk-ui diff --git a/lsfg-vk-ui/rsc/UI.qml b/lsfg-vk-ui/rsc/UI.qml index 5c5a96c..aa79ff9 100644 --- a/lsfg-vk-ui/rsc/UI.qml +++ b/lsfg-vk-ui/rsc/UI.qml @@ -145,12 +145,12 @@ ApplicationWindow { GroupEntry { title: "Path to Lossless Scaling" - description: "Change the location of Lossless.dll" + description: "Change the location of lsfg-vk.dll" FileEdit { Layout.fillWidth: true - title: "Select Lossless.dll" + title: "Select lsfg-vk.dll" filter: "Dynamic Link Library Files (*.dll)" text: backend.dll diff --git a/lsfg-vk-ui/src/backend.cpp b/lsfg-vk-ui/src/backend.cpp index 646ba25..e19c579 100644 --- a/lsfg-vk-ui/src/backend.cpp +++ b/lsfg-vk-ui/src/backend.cpp @@ -19,7 +19,7 @@ using namespace lsfgvk; using namespace lsfgvk::ui; Backend::Backend() { - // load configuration + // Load existing configuration ls::ConfigFile config{}; auto path = ls::findConfigurationFile(); @@ -27,7 +27,8 @@ Backend::Backend() { try { config = ls::ConfigFile(path); } catch (const std::exception&) { - std::cerr << "the configuration file is invalid, it has been backed up to '.old'\n"; + std::cerr << "The existing configuration file is invalid, " + << "it has been backed up to '.old'\n"; std::filesystem::rename(path, path.string() + ".old"); } } @@ -35,17 +36,17 @@ Backend::Backend() { this->m_global = config.global(); this->m_profiles = config.profiles(); - // create gpu list - this->m_gpu_list = ui::getAvailableGPUs(); + // Create gpu list + this->m_gpu_list = ui::queryGPUs(); - // create profile list model + // Create profile list model QStringList profiles; for (const auto& profile : this->m_profiles) profiles.append(QString::fromStdString(profile.name)); this->m_profile_list_model = new QStringListModel(profiles, this); - // create active_in list models + // Create active_in list models this->m_active_in_list_models.reserve(this->m_profiles.size()); for (const auto& profile : this->m_profiles) { QStringList active_in; @@ -55,11 +56,11 @@ Backend::Backend() { this->m_active_in_list_models.push_back(new QStringListModel(active_in, this)); } - // try to select first profile + // Try to select first profile if (!this->m_profiles.empty()) this->m_profile_index = 0; - // spawn saving thread + // Spawn saving thread std::thread([this, path]() { while (true) { std::this_thread::sleep_for(std::chrono::milliseconds(500)); @@ -74,10 +75,10 @@ Backend::Backend() { try { std::filesystem::create_directories(path.parent_path()); if (!std::filesystem::exists(path.parent_path())) - throw ls::error("unable to create configuration directory"); + throw ls::error("Unable to create configuration directory"); config.write(path); } catch (const std::exception& e) { - std::cerr << "unable to write configuration:\n- " << e.what() << "\n"; + std::cerr << "Unable to write configuration:\n- " << e.what() << "\n"; } } }).detach(); diff --git a/lsfg-vk-ui/src/backend.hpp b/lsfg-vk-ui/src/backend.hpp index 9eed642..7c35818 100644 --- a/lsfg-vk-ui/src/backend.hpp +++ b/lsfg-vk-ui/src/backend.hpp @@ -9,14 +9,18 @@ #include "lsfg-vk-common/configuration/config.hpp" #include +#include +#include +#include #include +#include #define getters public #define setters public namespace lsfgvk::ui { - /// Class tying ui and configuration together + /// Class tying UI and Configuration together class Backend : public QObject { Q_OBJECT diff --git a/lsfg-vk-ui/src/utils.cpp b/lsfg-vk-ui/src/utils.cpp index 0866b48..f5371e2 100644 --- a/lsfg-vk-ui/src/utils.cpp +++ b/lsfg-vk-ui/src/utils.cpp @@ -5,63 +5,100 @@ #include #include "utils.hpp" -#include "lsfg-vk-backend/lsfgvk.hpp" -#include -#include -#include +#include +#include +#include +#include #include -#include +#include #include +#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1 +#define VULKAN_HPP_NO_DEFAULT_DISPATCHER 1 +#define VULKAN_HPP_NO_CONSTRUCTORS 1 +#include + using namespace lsfgvk; using namespace lsfgvk::ui; -QStringList ui::getAvailableGPUs() { - // list of found GPUs and their optional PCI IDs - std::vector>> gpus{}; +QStringList ui::queryGPUs() { + // Create a Vulkan instance + vk::detail::DispatchLoaderDynamic dld; + dld.init(); - // create a backend to query all GPUs - try { - const backend::DevicePicker picker{[&gpus]( - const std::string& deviceName, - std::pair, - const std::optional& pci - ) { - gpus.emplace_back(deviceName, pci); - return false; // always fail - }}; + const vk::ApplicationInfo appInfo{ + .pApplicationName = "lsfg-vk-ui", + .applicationVersion = vk::makeVersion(2, 0, 0), + .pEngineName = "lsfg-vk-ui", + .engineVersion = vk::makeVersion(2, 0, 0), + .apiVersion = vk::ApiVersion12 // Required by lsfg-vk anyways + }; + const vk::InstanceCreateInfo instanceInfo{ + .pApplicationInfo = &appInfo + }; + const vk::UniqueInstance instance{vk::createInstanceUnique(instanceInfo, nullptr, dld)}; + dld.init(*instance); - const backend::Instance instance{picker, "/non/existent/path", false}; - throw std::runtime_error("???"); - } catch (const backend::error&) { // NOLINT (empty catch) - // expected + // Query physical devices + std::vector devicesByName{}; + std::vector devicesByBusId{}; + + for (const auto& physdev : instance->enumeratePhysicalDevices(dld)) { + // Check for VK_EXT_pci_bus_info + bool supportsPCIEXT{false}; + for (const auto& ext : physdev.enumerateDeviceExtensionProperties(nullptr, dld)) { + if (std::string(ext.extensionName) != vk::EXTPciBusInfoExtensionName) + continue; + + supportsPCIEXT = true; + break; + } + + // Fetch properties + vk::PhysicalDevicePCIBusInfoPropertiesEXT busInfo{}; + vk::PhysicalDeviceProperties2 info{ + .pNext = supportsPCIEXT ? &busInfo : nullptr + }; + physdev.getProperties2(&info, dld); + + auto& props{info.properties}; + + // Append device name + props.deviceName.back() = '\0'; // Ensure null-termination + devicesByName.emplace_back(props.deviceName); + + // Append PCI bus ID + if (!supportsPCIEXT) + continue; + + std::ostringstream pciss; + pciss << std::hex << std::setfill('0') + << std::setw(4) << busInfo.pciDomain << ":" + << std::setw(2) << busInfo.pciBus << ":" + << std::setw(2) << busInfo.pciDevice << "." + << std::setw(1) << busInfo.pciFunction; + devicesByBusId.emplace_back(pciss.str()); } - // NOLINTBEGIN (ranges) [GCC has some issues with ranges] - // first remove 1:1 duplicates - std::sort(gpus.begin(), gpus.end()); - gpus.erase(std::unique(gpus.begin(), gpus.end()), gpus.end()); - // NOLINTEND + // Count duplicate names + std::unordered_map repeats{}; + for (const auto& name : devicesByName) + repeats[name]++; - // build the frontend list + // Build the frontend list QStringList list{"Default"}; - for (const auto& gpu : gpus) { - // check if GPU is in list more than once - auto count = std::count_if(gpus.begin(), gpus.end(), - [&gpu](const auto& other) { - return other.first == gpu.first; - } - ); + for (size_t i = 0; i < devicesByName.size(); i++) { + const auto& name{devicesByName.at(i)}; - // add pci id to distinguish, otherwise add just the name + // Decide whether to show PCI bus ID or device name QString entry; - if (count > 1 && gpu.second.has_value()) - entry = QString::fromStdString(*gpu.second); + if (repeats[name] > 1) + entry = QString::fromStdString(devicesByBusId.at(i)); else - entry = QString::fromStdString(gpu.first); + entry = QString::fromStdString(name); - // ensure no duplicates (flatpak does funny things) + // Append to list if not already present (flatpak does funny things) if (list.contains(entry)) continue; list.append(entry); diff --git a/lsfg-vk-ui/src/utils.hpp b/lsfg-vk-ui/src/utils.hpp index 4ccb312..c523a46 100644 --- a/lsfg-vk-ui/src/utils.hpp +++ b/lsfg-vk-ui/src/utils.hpp @@ -6,9 +6,11 @@ namespace lsfgvk::ui { - /// get the list of available GPUs, automatically - /// switching to PCI IDs if there are duplicates - /// @return list of available GPUs - QStringList getAvailableGPUs(); + /// + /// Query all GPUs available on the system. + /// + /// @return List of available GPUs + /// + QStringList queryGPUs(); }