mirror of
https://github.com/PancakeTAS/lsfg-vk.git
synced 2026-05-10 11:11:40 +00:00
Merge eff5db14c3 into 218820e8dc
This commit is contained in:
commit
d4b41c9008
84 changed files with 5585 additions and 3221 deletions
|
|
@ -6,14 +6,14 @@ include(GNUInstallDirs)
|
|||
option(LSFGVK_BUILD_VK_LAYER "Build the Vulkan layer" ON)
|
||||
option(LSFGVK_BUILD_UI "Build the user interface" OFF)
|
||||
option(LSFGVK_BUILD_CLI "Build the command line interface" ON)
|
||||
option(LSFGVK_INSTALL_DEVELOP "Install development libraries and headers" OFF)
|
||||
option(LSFGVK_INSTALL_LIBRARIES "Install development libraries and headers" OFF)
|
||||
option(LSFGVK_INSTALL_XDG_FILES "Install the application icon and desktop files" OFF)
|
||||
set(LSFGVK_LAYER_LIBRARY_PATH liblsfg-vk-layer.so CACHE STRING "Change where Vulkan searches for the layer library")
|
||||
option(LSFGVK_TESTING_RENDERDOC "Enable RenderDoc integration for testing purposes" OFF)
|
||||
# === READ HERE FOR BUILD OPTIONS ===
|
||||
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
|
||||
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
set(CMAKE_SKIP_RPATH ON)
|
||||
|
|
@ -29,24 +29,20 @@ if(CMAKE_BUILD_TYPE STREQUAL "Debug")
|
|||
add_compile_options(
|
||||
# By default, enable all warnings
|
||||
-Weverything
|
||||
-Wno-unknown-warning-option
|
||||
# Some warnings are incompatible with each other
|
||||
-Wno-pre-c++20-compat-pedantic
|
||||
-Wno-c++98-compat-pedantic
|
||||
-Wno-switch-default
|
||||
# Then there's code-style things I don't care about
|
||||
-Wno-missing-designated-field-initializers
|
||||
-Wno-shadow
|
||||
-Wno-unused-macros
|
||||
# And functional warning I don't care about either
|
||||
-Wno-cast-function-type-strict
|
||||
-Wno-padded
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(LSFGVK_TESTING_RENDERDOC)
|
||||
add_compile_definitions(LSFGVK_TESTING_RENDERDOC)
|
||||
endif()
|
||||
|
||||
add_subdirectory(lsfg-vk-common)
|
||||
add_subdirectory(lsfg-vk-backend)
|
||||
if(LSFGVK_BUILD_VK_LAYER)
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ However, lsfg-vk provides several CMake options to customize the build process:
|
|||
- `LSFGVK_BUILD_VK_LAYER`: Set to `On` to build the Vulkan layer (default is `On`).
|
||||
- `LSFGVK_BUILD_UI`: Set to `On` to build the user interface (default is `Off`).
|
||||
- `LSFGVK_BUILD_CLI`: Set to `On` to build the command-line interface (default is `On`).
|
||||
- `LSFGVK_INSTALL_DEVELOP`: Set to `On` to install development files like headers and libraries (default is `Off`).
|
||||
- `LSFGVK_INSTALL_LIBRARIES`: Set to `On` to install development files like headers and libraries (default is `Off`).
|
||||
- `LSFGVK_INSTALL_XDG_FILES`: Set to `On` to install XDG desktop files and icons (default is `Off`).
|
||||
- `LSFGVK_LAYER_LIBRARY_PATH`: Override the path to the Vulkan layer library (by default, Vulkan will search the systems library path).
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ Regardless of the method you choose, the concept of profiles remains the same.
|
|||
### All Configuration Options
|
||||
|
||||
Below is a list of all available **global** configuration options:
|
||||
- **Path to Lossless Scaling / `dll`**: By default, lsfg-vk will search certain directories for Lossless Scaling. If you have Lossless Scaling installed in a custom location, you can specify the full path to the "Lossless.dll" file inside of Lossless Scaling here.
|
||||
- **Path to Lossless Scaling / `dll`**: By default, lsfg-vk will search certain directories for Lossless Scaling. If you have Lossless Scaling installed in a custom location, you can specify the full path to the "lsfg-vk.dll" file inside of Lossless Scaling here.
|
||||
- **Allow half-precision / `allow_fp16`**: If enabled, this will allow lsfg-vk to take advantage of half-precision shader operations if supported by the GPU. This has a giant performance uplift on AMD GPUs, but does not affect NVIDIA GPUs (GTX 1000-series or older cards will actually see a big performance **decrease**). This option **does not** influence quality. (Default: `true`)
|
||||
|
||||
Next is a list of all available **profile** configuration options:
|
||||
|
|
@ -18,12 +18,12 @@ Next is a list of all available **profile** configuration options:
|
|||
- **Active In / `active_in`**: A list of 1) linux binary names, such as `mpv`, 2) windows executables, such as `GenshinImpact.exe` and 3) process names, such as `GameThread`. It is also possible to specify the last part of a path (e.g. `Ghostrunner2/Binaries/Win64/Ghostrunner2-Win64-Shipping.exe`). When a process matching one of these rules is detected, this profile will be activated.
|
||||
- **Multiplier / `multiplier`**: The frame generation multiplier. A value of 3 means that for every frame rendered by the application, lsfg-vk will generate 2 additional frames. (Default: `2`)
|
||||
- **Flow Scale / `flow_scale`**: The resolution scale at which the motion vectors are calculated. A lower value means better performance, but worse quality. (Default: `1.0`)
|
||||
- **Performance Mode / `performance_mode`**: When enabled, a significantly lighter frame generation model is used. This has a minor quality impact, but greatly improves performance.
|
||||
- **Performance Mode / `performance_mode`**: When enabled, a significantly lighter frame generation model is used. This has a minor quality impact, but greatly improves performance.
|
||||
(Default: `false`)
|
||||
- **Pacing Mode / `pacing`**: This option is explained in greater detail below. Supported values are **None / `none`**.
|
||||
- **GPU / `gpu`**: The GPU to use for frame generation. This MUST be the **same GPU** as the one being used by the application. **Dual GPU is NOT supported**. You can identify a GPU through its name (e.g. `NVIDIA GeForce RTX 3080`), uppercase-only ID (e.g. `0x10DE:0x2C02`) or PCI bus ID (e.g. `3:0.0`). If not specified, the primary GPU will be used, which may lead to issues.
|
||||
|
||||
The "Multiplier", "Flow Scale" and "Performance Mode" options can be **hot-reloaded**, meaning that changes to these options will take effect immediately without needing to restart the application. Options such as "Pacing Mode" or removal of the profile require a swapchain recreation, which usually means resizing or restarting the application. Any other change requires an application restart.
|
||||
The "Multiplier", "Flow Scale" and "Performance Mode" options can be **hot-reloaded**, meaning that changes to these options will take effect immediately without needing to restart the application. Options such as "Pacing Mode" or removal of the profile require a swapchain recreation, which usually means resizing or restarting the application. Any other change requires an application restart.
|
||||
|
||||
### Pacing Modes
|
||||
|
||||
|
|
@ -45,7 +45,7 @@ The following environment variables affect lsfg-vk:
|
|||
- `LSFGVK_PROFILE`: Name of the profile to use. If set, this will override automatic profile detection.
|
||||
|
||||
If you do not wish to use a configuration file, you can also set configuration options through environment variables. To do this, set `LSFGVK_ENV=1` and then any of the following variables:
|
||||
- `LSFGVK_DLL_PATH`: Path to Lossless Scaling DLL.
|
||||
- `LSFGVK_DLL_PATH`: Path to lsfg-vk DLL.
|
||||
- `LSFGVK_NO_FP16`: If set to `1`, half-precision will be disabled.
|
||||
- `LSFGVK_MULTIPLIER`: Frame generation multiplier.
|
||||
- `LSFGVK_FLOW_SCALE`: Flow scale value.
|
||||
|
|
|
|||
|
|
@ -23,8 +23,3 @@ Checks:
|
|||
- -cppcoreguidelines-macro-usage
|
||||
- -bugprone-easily-swappable-parameters
|
||||
- -portability-avoid-pragma-once
|
||||
# Vulkan requires the use of reinterpret/const casts in many places
|
||||
- -cppcoreguidelines-pro-type-reinterpret-cast
|
||||
- -cppcoreguidelines-pro-type-const-cast
|
||||
# We use namespace forward declarations
|
||||
- -bugprone-forward-declaration-namespace
|
||||
|
|
|
|||
|
|
@ -1,33 +1,23 @@
|
|||
set(BACKEND_SOURCES
|
||||
"src/extraction/dll_reader.cpp"
|
||||
"src/extraction/shader_registry.cpp"
|
||||
"src/helpers/limits.cpp"
|
||||
"src/helpers/managed_shader.cpp"
|
||||
"src/helpers/utils.cpp"
|
||||
"src/shaderchains/alpha0.cpp"
|
||||
"src/shaderchains/alpha1.cpp"
|
||||
"src/shaderchains/beta0.cpp"
|
||||
"src/shaderchains/beta1.cpp"
|
||||
"src/shaderchains/delta0.cpp"
|
||||
"src/shaderchains/delta1.cpp"
|
||||
"src/shaderchains/gamma0.cpp"
|
||||
"src/shaderchains/gamma1.cpp"
|
||||
"src/shaderchains/generate.cpp"
|
||||
"src/shaderchains/mipmaps.cpp"
|
||||
"src/modules/library/dll.cpp"
|
||||
"src/modules/library.cpp"
|
||||
"src/modules/pipeline.cpp"
|
||||
"src/utility/pipelines.cpp"
|
||||
"src/utility/vkhelper.cpp"
|
||||
"src/lsfgvk.cpp")
|
||||
|
||||
add_library(lsfg-vk-backend STATIC ${BACKEND_SOURCES})
|
||||
|
||||
target_include_directories(lsfg-vk-backend
|
||||
PUBLIC include)
|
||||
PUBLIC include
|
||||
PRIVATE src)
|
||||
|
||||
target_link_libraries(lsfg-vk-backend
|
||||
PUBLIC lsfg-vk-common)
|
||||
target_compile_options(lsfg-vk-backend PUBLIC
|
||||
$<$<CXX_COMPILER_ID:Clang>:-fconstexpr-steps=4290000000>
|
||||
$<$<CXX_COMPILER_ID:GNU>:-fconstexpr-ops-limit=4290000000>
|
||||
)
|
||||
|
||||
set_target_properties(lsfg-vk-backend PROPERTIES
|
||||
CXX_VISIBILITY_PRESET hidden)
|
||||
|
||||
if(LSFGVK_INSTALL_DEVELOP)
|
||||
if(LSFGVK_INSTALL_LIBRARIES)
|
||||
install(TARGETS lsfg-vk-backend
|
||||
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
|
||||
install(DIRECTORY "include/lsfg-vk-backend/"
|
||||
|
|
|
|||
|
|
@ -1,143 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <exception>
|
||||
#include <filesystem>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
|
||||
class [[gnu::visibility("default")]] ContextImpl;
|
||||
class [[gnu::visibility("default")]] InstanceImpl;
|
||||
|
||||
using Context = ContextImpl;
|
||||
|
||||
///
|
||||
/// Primitive exception class that deliveres a detailed error message
|
||||
///
|
||||
class [[gnu::visibility("default")]] error : public std::runtime_error {
|
||||
public:
|
||||
///
|
||||
/// Construct an error
|
||||
///
|
||||
/// @param msg Error message.
|
||||
/// @param inner Inner exception.
|
||||
///
|
||||
explicit error(const std::string &msg, const std::exception &inner);
|
||||
|
||||
///
|
||||
/// Construct an error
|
||||
///
|
||||
/// @param msg Error message.
|
||||
///
|
||||
explicit error(const std::string &msg);
|
||||
|
||||
error(const error &) = default;
|
||||
error &operator=(const error &) = default;
|
||||
error(error &&) = default;
|
||||
error &operator=(error &&) = default;
|
||||
~error() override;
|
||||
};
|
||||
|
||||
/// Function type for picking a device based on its name and IDs
|
||||
using DevicePicker = std::function<bool(
|
||||
const std::string& deviceName,
|
||||
std::pair<const std::string&, const std::string&> ids, // (vendor ID, device ID) 0xXXXX format
|
||||
const std::optional<std::string>& pci // (bus:slot.func) if available, no padded zeros
|
||||
)>;
|
||||
|
||||
///
|
||||
/// Main entry point of the library
|
||||
///
|
||||
class [[gnu::visibility("default")]] Instance {
|
||||
public:
|
||||
///
|
||||
/// Create a lsfg-vk instance
|
||||
///
|
||||
/// @param devicePicker Function that picks a physical device based on some identifiers.
|
||||
/// @param shaderDllPath Path to the Lossless.dll file to load shaders from.
|
||||
/// @param allowLowPrecision Whether to load low-precision (FP16) shaders if supported.
|
||||
///
|
||||
/// @throws backend::error on failure
|
||||
///
|
||||
Instance(
|
||||
const DevicePicker& devicePicker,
|
||||
const std::filesystem::path& shaderDllPath,
|
||||
bool allowLowPrecision
|
||||
);
|
||||
|
||||
///
|
||||
/// Open a frame generation context.
|
||||
///
|
||||
/// The VkFormat of the exchanged images is inferred from whether hdr is true or false:
|
||||
/// - false: VK_FORMAT_R8G8B8A8_UNORM
|
||||
/// - true: VK_FORMAT_R16G16B16A16_SFLOAT
|
||||
///
|
||||
/// The application and library must keep track of the frame index. When the next frame
|
||||
/// is ready, signal the syncFd with one increment (with the first trigger being 1).
|
||||
/// Each generated frame will increment the semaphore by one:
|
||||
/// - Application signals 1 -> Start generating with (curr, next) source images
|
||||
/// - Library signals 1 -> First frame between (curr, next) is ready
|
||||
/// - Library signals N -> N-th frame between (curr, next) is ready
|
||||
/// - Application signals N+1 -> Start generating with (next, curr) source images
|
||||
///
|
||||
/// @param sourceFds Pair of file descriptors for the source images alternated between.
|
||||
/// @param destFds Vector with file descriptors to import output images from.
|
||||
/// @param syncFd File descriptor for the timeline semaphore used for synchronization.
|
||||
/// @param width Width of the images.
|
||||
/// @param height Height of the images.
|
||||
/// @param hdr Whether the images are HDR.
|
||||
/// @param flow Motion flow factor.
|
||||
/// @param perf Whether to enable performance mode.
|
||||
///
|
||||
/// @throws backend::error on failure
|
||||
///
|
||||
Context& openContext(
|
||||
std::pair<int, int> sourceFds,
|
||||
const std::vector<int>& destFds,
|
||||
int syncFd,
|
||||
uint32_t width, uint32_t height,
|
||||
bool hdr, float flow, bool perf
|
||||
);
|
||||
|
||||
///
|
||||
/// Schedule a new set of generated frames.
|
||||
///
|
||||
/// @param context Context to use.
|
||||
/// @throws backend::error on failure
|
||||
///
|
||||
void scheduleFrames(Context& context);
|
||||
|
||||
///
|
||||
/// Close a frame generation context
|
||||
///
|
||||
/// @param context Context to close.
|
||||
///
|
||||
void closeContext(const Context& context);
|
||||
|
||||
// Non-copyable and non-movable
|
||||
Instance(const Instance&) = delete;
|
||||
Instance& operator=(const Instance&) = delete;
|
||||
Instance(Instance&&) = delete;
|
||||
Instance& operator=(Instance&&) = delete;
|
||||
virtual ~Instance();
|
||||
private:
|
||||
std::unique_ptr<InstanceImpl> m_impl;
|
||||
|
||||
std::vector<std::unique_ptr<Context>> m_contexts;
|
||||
};
|
||||
|
||||
///
|
||||
/// Make all lsfg-vk instances leaking.
|
||||
/// This is to workaround a bug in the Vulkan loader, which
|
||||
/// makes it impossible to destroy Vulkan instances and devices.
|
||||
///
|
||||
void makeLeaking();
|
||||
|
||||
}
|
||||
152
lsfg-vk-backend/include/lsfg-vk/lsfgvk.hpp
Normal file
152
lsfg-vk-backend/include/lsfg-vk/lsfgvk.hpp
Normal file
|
|
@ -0,0 +1,152 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#ifdef LSFGVK_PRIV
|
||||
#include <vulkan/vulkan_core.h>
|
||||
#endif // LSFGVK_PRIV
|
||||
|
||||
namespace lsfgvk {
|
||||
|
||||
/// Forward declaration of implementation classes
|
||||
namespace priv {
|
||||
struct [[gnu::visibility("default")]] Instance;
|
||||
struct [[gnu::visibility("default")]] Context;
|
||||
}
|
||||
|
||||
///
|
||||
/// Main entrypoint of the library
|
||||
///
|
||||
class [[gnu::visibility("default")]] Instance {
|
||||
friend class Context;
|
||||
public:
|
||||
///
|
||||
/// Create a lsfg-vk instance
|
||||
///
|
||||
/// The device identifier may be one of:
|
||||
/// - Device name (e.g. "NVIDIA GeForce RTX 5080")
|
||||
/// - Vendor ID + Device ID in lowercase hexadecimal (e.g. "10de:2c02")
|
||||
/// - PCI bus ID with padded zeroes (e.g. "0000:01:00.0")
|
||||
///
|
||||
/// @param deviceId Device identifier (see above)
|
||||
/// @param lsfgvkDllPath Path to the lsfg-vk DLL file
|
||||
/// @param allowFP16 Whether to allow usage of fp16 shader variants
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
Instance(
|
||||
const std::string& deviceId,
|
||||
const std::filesystem::path& lsfgvkDllPath,
|
||||
bool allowFP16
|
||||
);
|
||||
|
||||
#ifdef LSFGVK_PRIV
|
||||
/// Get the underlying Vulkan instance handle
|
||||
/// @return Vulkan instance
|
||||
[[nodiscard]] VkInstance _instance() const;
|
||||
|
||||
/// Get the underlying Vulkan device handle
|
||||
/// @return Vulkan device
|
||||
[[nodiscard]] VkDevice _device() const;
|
||||
#endif // LSFGVK_PRIV
|
||||
|
||||
// Non-copyable, non-movable
|
||||
Instance(const Instance&) = delete;
|
||||
Instance& operator=(const Instance&) = delete;
|
||||
Instance(Instance&&) = delete;
|
||||
Instance& operator=(Instance&&) = delete;
|
||||
~Instance();
|
||||
private:
|
||||
std::unique_ptr<priv::Instance> m_priv;
|
||||
};
|
||||
|
||||
///
|
||||
/// File descriptors exported from a context, the user must close them after use.
|
||||
///
|
||||
struct FileDescriptors {
|
||||
///
|
||||
/// File descriptor for a Vulkan memory allocation containing
|
||||
/// a 2D array of RGBA8 pixels with length 2 and optimal allocation.
|
||||
///
|
||||
/// Starting at iteration 0, the next frame for which frames should be interpolated
|
||||
/// inbetween should be placed in image `iteration % 2`.
|
||||
///
|
||||
int sourceFd;
|
||||
|
||||
///
|
||||
/// File descriptor for a Vulkan memory allocation containing a single RGBA8
|
||||
/// image into which each generated frame will be written to.
|
||||
///
|
||||
int destinationFd;
|
||||
|
||||
///
|
||||
/// File descriptor for a timeline semaphore. When scheduling frames for generation,
|
||||
/// a specific value is waited for and signaled on return. It is up to the user to ensure
|
||||
/// the destination image is not overwritten before it is read.
|
||||
///
|
||||
int syncFd;
|
||||
};
|
||||
|
||||
/// A context for generating frames
|
||||
///
|
||||
class [[gnu::visibility("default")]] Context {
|
||||
public:
|
||||
///
|
||||
/// Create a frame generation context
|
||||
///
|
||||
/// @param instance Parent instance
|
||||
/// @param width Image width
|
||||
/// @param height Image height
|
||||
/// @param flowScale Flow estimation scale factor
|
||||
/// @param performanceMode Whether to enable performance mode
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
Context(
|
||||
const Instance& instance,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
float flowScale,
|
||||
bool performanceMode
|
||||
);
|
||||
|
||||
///
|
||||
/// Export the internal resources
|
||||
///
|
||||
/// @return File descriptors for internal resources
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
[[nodiscard]] FileDescriptors exportFds() const;
|
||||
|
||||
///
|
||||
/// Dispatch frame generation
|
||||
///
|
||||
/// Let `so - 1` be the current value of the timeline semaphore, starting at 0.
|
||||
/// The user must signal `so` to start the generation of the next frame, after
|
||||
/// which lsfg-vk will signal `so + 1`. The user must ensure the previously
|
||||
/// generated frame is read before signaling the next one (at `so + 2` and so on).
|
||||
///
|
||||
/// @param total Total number of frames to generate
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
void dispatch(uint32_t total);
|
||||
|
||||
///
|
||||
/// Wait for the device to be idle
|
||||
///
|
||||
void idle() const;
|
||||
|
||||
// Non-copyable, non-movable
|
||||
Context(const Context&) = delete;
|
||||
Context& operator=(const Context&) = delete;
|
||||
Context(Context&&) = delete;
|
||||
Context& operator=(Context&&) = delete;
|
||||
~Context();
|
||||
private:
|
||||
std::unique_ptr<priv::Context> m_priv;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
|
||||
/// extract all resources from a DLL file
|
||||
/// @param dll path to the DLL file
|
||||
/// @return map of resource IDs to their binary data
|
||||
/// @throws ls::error on various failure points
|
||||
std::unordered_map<uint32_t, std::vector<uint8_t>> extractResourcesFromDLL(
|
||||
const std::filesystem::path& dll);
|
||||
|
||||
}
|
||||
|
|
@ -1,171 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "shader_registry.hpp"
|
||||
#include "lsfg-vk-common/helpers/errors.hpp"
|
||||
#include "lsfg-vk-common/vulkan/shader.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
using namespace lsfgvk;
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
namespace {
|
||||
/// get the source code for a shader
|
||||
const std::vector<uint8_t>& getShaderSource(uint32_t id, bool fp16, bool perf,
|
||||
const std::unordered_map<uint32_t, std::vector<uint8_t>>& resources) {
|
||||
const size_t BASE_OFFSET = 49;
|
||||
const size_t OFFSET_PERF = 23;
|
||||
const size_t OFFSET_FP32 = 49;
|
||||
|
||||
auto it = resources.find(BASE_OFFSET + id +
|
||||
(perf ? OFFSET_PERF : 0) +
|
||||
(fp16 ? 0 : OFFSET_FP32));
|
||||
if (it == resources.end())
|
||||
throw ls::error("unable to find shader with id: " + std::to_string(id));
|
||||
|
||||
return it->second;
|
||||
}
|
||||
/// patch the generate shader
|
||||
void patchGenerateShader(std::vector<uint8_t>& data, bool hdr) {
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wunknown-warning-option"
|
||||
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage-in-container"
|
||||
auto* _ptr = data.data();
|
||||
const std::span<uint32_t> words(
|
||||
reinterpret_cast<uint32_t*>(_ptr),
|
||||
data.size() / sizeof(uint32_t)
|
||||
);
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
const uint16_t SpvOpCapability = 17;
|
||||
const uint16_t SpvOpTypeImage = 25;
|
||||
const uint32_t SpvCapabilityStorageImageWriteWithoutFormat = 56;
|
||||
const uint32_t SpvCapabilityShader = 1;
|
||||
const uint32_t SpvImageFormatRgba16f = 2;
|
||||
const uint32_t SpvImageFormatRgba8 = 4;
|
||||
|
||||
for (size_t i = 5; i < words.size();) {
|
||||
const uint32_t& word = words[i]; // NOLINT ([]-usage)
|
||||
const uint16_t wc = (word >> 16);
|
||||
const uint16_t op = word & 0xFFFF;
|
||||
|
||||
// remove write without format capability
|
||||
if (op == SpvOpCapability && wc >= 2) {
|
||||
uint32_t& cap = words[i + 1]; // NOLINT ([]-usage)
|
||||
if (cap == SpvCapabilityStorageImageWriteWithoutFormat)
|
||||
cap = SpvCapabilityShader;
|
||||
}
|
||||
|
||||
// patch format in image instructions
|
||||
if (op == SpvOpTypeImage && wc >= 9) {
|
||||
const uint32_t sampled = words[i + 7]; // NOLINT ([]-usage)
|
||||
if (sampled == 2)
|
||||
words[i + 8] = // NOLINT ([]-usage)
|
||||
hdr ? SpvImageFormatRgba16f : SpvImageFormatRgba8;
|
||||
}
|
||||
|
||||
i += wc ? wc : 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ShaderRegistry backend::buildShaderRegistry(const vk::Vulkan& vk, bool fp16,
|
||||
const std::unordered_map<uint32_t, std::vector<uint8_t>>& resources) {
|
||||
// patch the generate shader
|
||||
std::vector<uint8_t> generate_data = getShaderSource(256, fp16, false, resources);
|
||||
std::vector<uint8_t> generate_data_hdr = generate_data;
|
||||
patchGenerateShader(generate_data, false);
|
||||
patchGenerateShader(generate_data_hdr, true);
|
||||
|
||||
// load all other shaders
|
||||
#define SHADER(id, p1, p2, p3, p4) \
|
||||
vk::Shader(vk, getShaderSource(id, fp16, PERF, resources), \
|
||||
p1, p2, p3, p4)
|
||||
|
||||
return {
|
||||
#define PERF false
|
||||
.mipmaps = SHADER(255, 1, 7, 1, 1),
|
||||
.generate = vk::Shader(vk, generate_data, 5, 1, 1, 2),
|
||||
.generate_hdr = vk::Shader(vk, generate_data_hdr, 5, 1, 1, 2),
|
||||
.quality = {
|
||||
.alpha = {
|
||||
SHADER(267, 1, 2, 0, 1),
|
||||
SHADER(268, 2, 2, 0, 1),
|
||||
SHADER(269, 2, 4, 0, 1),
|
||||
SHADER(270, 4, 4, 0, 1)
|
||||
},
|
||||
.beta = {
|
||||
SHADER(275, 12, 2, 0, 1),
|
||||
SHADER(276, 2, 2, 0, 1),
|
||||
SHADER(277, 2, 2, 0, 1),
|
||||
SHADER(278, 2, 2, 0, 1),
|
||||
SHADER(279, 2, 6, 1, 1)
|
||||
},
|
||||
.gamma = {
|
||||
SHADER(257, 9, 3, 1, 2),
|
||||
SHADER(259, 3, 4, 0, 1),
|
||||
SHADER(260, 4, 4, 0, 1),
|
||||
SHADER(261, 4, 4, 0, 1),
|
||||
SHADER(262, 6, 1, 1, 2)
|
||||
},
|
||||
.delta = {
|
||||
SHADER(257, 9, 3, 1, 2),
|
||||
SHADER(263, 3, 4, 0, 1),
|
||||
SHADER(264, 4, 4, 0, 1),
|
||||
SHADER(265, 4, 4, 0, 1),
|
||||
SHADER(266, 6, 1, 1, 2),
|
||||
SHADER(258, 10, 2, 1, 2),
|
||||
SHADER(271, 2, 2, 0, 1),
|
||||
SHADER(272, 2, 2, 0, 1),
|
||||
SHADER(273, 2, 2, 0, 1),
|
||||
SHADER(274, 3, 1, 1, 2)
|
||||
}
|
||||
},
|
||||
#undef PERF
|
||||
#define PERF true
|
||||
.performance = {
|
||||
.alpha = {
|
||||
SHADER(267, 1, 1, 0, 1),
|
||||
SHADER(268, 1, 1, 0, 1),
|
||||
SHADER(269, 1, 2, 0, 1),
|
||||
SHADER(270, 2, 2, 0, 1)
|
||||
},
|
||||
.beta = {
|
||||
SHADER(275, 6, 2, 0, 1),
|
||||
SHADER(276, 2, 2, 0, 1),
|
||||
SHADER(277, 2, 2, 0, 1),
|
||||
SHADER(278, 2, 2, 0, 1),
|
||||
SHADER(279, 2, 6, 1, 1)
|
||||
},
|
||||
.gamma = {
|
||||
SHADER(257, 5, 3, 1, 2),
|
||||
SHADER(259, 3, 2, 0, 1),
|
||||
SHADER(260, 2, 2, 0, 1),
|
||||
SHADER(261, 2, 2, 0, 1),
|
||||
SHADER(262, 4, 1, 1, 2)
|
||||
},
|
||||
.delta = {
|
||||
SHADER(257, 5, 3, 1, 2),
|
||||
SHADER(263, 3, 2, 0, 1),
|
||||
SHADER(264, 2, 2, 0, 1),
|
||||
SHADER(265, 2, 2, 0, 1),
|
||||
SHADER(266, 4, 1, 1, 2),
|
||||
SHADER(258, 6, 1, 1, 2),
|
||||
SHADER(271, 1, 1, 0, 1),
|
||||
SHADER(272, 1, 1, 0, 1),
|
||||
SHADER(273, 1, 1, 0, 1),
|
||||
SHADER(274, 2, 1, 1, 2)
|
||||
}
|
||||
},
|
||||
#undef PERF
|
||||
.is_fp16 = fp16
|
||||
};
|
||||
|
||||
#undef SHADER
|
||||
}
|
||||
|
|
@ -1,42 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lsfg-vk-common/vulkan/shader.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
|
||||
/// shader collection struct
|
||||
struct Shaders {
|
||||
std::array<vk::Shader, 4> alpha;
|
||||
std::array<vk::Shader, 5> beta;
|
||||
std::array<vk::Shader, 5> gamma;
|
||||
std::array<vk::Shader, 10> delta;
|
||||
};
|
||||
|
||||
/// shader registry struct
|
||||
struct ShaderRegistry {
|
||||
vk::Shader mipmaps;
|
||||
vk::Shader generate, generate_hdr;
|
||||
Shaders quality;
|
||||
Shaders performance;
|
||||
|
||||
bool is_fp16; //!< whether the fp16 shader variants were loaded
|
||||
};
|
||||
|
||||
/// build a shader registry from resources
|
||||
/// @param vk Vulkan instance
|
||||
/// @param fp16 whether to load fp16 variants
|
||||
/// @param resources map of resource IDs to their binary data
|
||||
/// @return constructed shader registry
|
||||
/// @throws ls::error if shaders are missing
|
||||
/// @throws vk::vulkan_error on Vulkan errors
|
||||
ShaderRegistry buildShaderRegistry(const vk::Vulkan& vk, bool fp16,
|
||||
const std::unordered_map<uint32_t, std::vector<uint8_t>>& resources);
|
||||
|
||||
}
|
||||
|
|
@ -1,56 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "limits.hpp"
|
||||
|
||||
#include "lsfg-vk-common/vulkan/descriptor_pool.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
using namespace lsfgvk;
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
namespace {
|
||||
const vk::Limits BASE_LIMITS{
|
||||
.sets = 51,
|
||||
.uniform_buffers = 3,
|
||||
.samplers = 51,
|
||||
.sampled_images = 165,
|
||||
.storage_images = 172
|
||||
};
|
||||
const vk::Limits BASE_LIMITS_PERF{
|
||||
.sampled_images = 91,
|
||||
.storage_images = 102
|
||||
};
|
||||
const vk::Limits GEN_LIMITS{
|
||||
.sets = 93,
|
||||
.uniform_buffers = 54,
|
||||
.samplers = 147,
|
||||
.sampled_images = 567,
|
||||
.storage_images = 261
|
||||
};
|
||||
const vk::Limits GEN_LIMITS_PERF{
|
||||
.sampled_images = 339,
|
||||
.storage_images = 183
|
||||
};
|
||||
}
|
||||
|
||||
vk::Limits backend::calculateDescriptorPoolLimits(size_t count, bool perf) {
|
||||
const auto m = static_cast<uint16_t>(count);
|
||||
|
||||
vk::Limits a{BASE_LIMITS};
|
||||
vk::Limits b{GEN_LIMITS};
|
||||
if (perf) {
|
||||
a.sampled_images = BASE_LIMITS_PERF.sampled_images;
|
||||
b.sampled_images = GEN_LIMITS_PERF.sampled_images;
|
||||
a.storage_images = BASE_LIMITS_PERF.storage_images;
|
||||
b.storage_images = GEN_LIMITS_PERF.storage_images;
|
||||
}
|
||||
|
||||
a.sets += b.sets * m;
|
||||
a.uniform_buffers += b.uniform_buffers * m;
|
||||
a.samplers += b.samplers * m;
|
||||
a.sampled_images += b.sampled_images * m;
|
||||
a.storage_images += b.storage_images * m;
|
||||
return a;
|
||||
}
|
||||
|
|
@ -1,15 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lsfg-vk-common/vulkan/descriptor_pool.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
/// calculate limits for descriptor pools
|
||||
/// @param count number of images
|
||||
/// @param perf whether performance mode is enabled
|
||||
/// @return calculated limits
|
||||
vk::Limits calculateDescriptorPoolLimits(size_t count, bool perf);
|
||||
}
|
||||
|
|
@ -1,128 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "managed_shader.hpp"
|
||||
#include "lsfg-vk-common/vulkan/buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/descriptor_pool.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/sampler.hpp"
|
||||
#include "lsfg-vk-common/vulkan/shader.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk;
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
ManagedShaderBuilder& ManagedShaderBuilder::sampled(const vk::Image& image) {
|
||||
this->sampledImages.push_back(std::ref(image));
|
||||
return *this;
|
||||
}
|
||||
|
||||
ManagedShaderBuilder& ManagedShaderBuilder::sampleds(
|
||||
const std::vector<vk::Image>& images,
|
||||
size_t offset, size_t count) {
|
||||
if (count == 0 || offset + count > images.size())
|
||||
count = images.size() - offset;
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
this->sampledImages.push_back(std::ref(images.at(offset + i)));
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
ManagedShaderBuilder& ManagedShaderBuilder::storage(const vk::Image& image) {
|
||||
this->storageImages.push_back(std::ref(image));
|
||||
return *this;
|
||||
}
|
||||
|
||||
ManagedShaderBuilder& ManagedShaderBuilder::storages(
|
||||
const std::vector<vk::Image>& images,
|
||||
size_t offset, size_t count) {
|
||||
if (count == 0 || offset + count > images.size())
|
||||
count = images.size() - offset;
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
this->storageImages.push_back(std::ref(images.at(offset + i)));
|
||||
return *this;
|
||||
}
|
||||
|
||||
ManagedShaderBuilder& ManagedShaderBuilder::sampler(const vk::Sampler& sampler) {
|
||||
this->imageSamplers.push_back(std::ref(sampler));
|
||||
return *this;
|
||||
}
|
||||
|
||||
ManagedShaderBuilder& ManagedShaderBuilder::samplers(
|
||||
const std::vector<vk::Sampler>& samplers) {
|
||||
for (const auto& sampler : samplers)
|
||||
this->imageSamplers.push_back(std::ref(sampler));
|
||||
return *this;
|
||||
}
|
||||
|
||||
ManagedShaderBuilder& ManagedShaderBuilder::buffer(const vk::Buffer& buffer) {
|
||||
this->constantBuffers.push_back(std::ref(buffer));
|
||||
return *this;
|
||||
}
|
||||
|
||||
ManagedShader ManagedShaderBuilder::build(const vk::Vulkan& vk,
|
||||
const vk::DescriptorPool& pool, const vk::Shader& shader) const {
|
||||
std::vector<vk::Barrier> barriers;
|
||||
barriers.reserve(this->storageImages.size() + this->sampledImages.size());
|
||||
|
||||
for (const auto& img : this->sampledImages)
|
||||
barriers.push_back({
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = img.get().handle(),
|
||||
.subresourceRange = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1
|
||||
}
|
||||
});
|
||||
for (const auto& img : this->storageImages)
|
||||
barriers.push_back({
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.srcAccessMask = VK_ACCESS_SHADER_READ_BIT,
|
||||
.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = img.get().handle(),
|
||||
.subresourceRange = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1
|
||||
}
|
||||
});
|
||||
|
||||
return {
|
||||
std::ref(shader),
|
||||
std::move(barriers),
|
||||
vk::DescriptorSet(vk, pool, shader,
|
||||
this->sampledImages,
|
||||
this->storageImages,
|
||||
this->imageSamplers,
|
||||
this->constantBuffers)
|
||||
};
|
||||
}
|
||||
|
||||
void ManagedShader::dispatch(const vk::Vulkan& vk, const vk::CommandBuffer& cmd,
|
||||
VkExtent2D extent) const {
|
||||
cmd.dispatch(vk, this->shader,
|
||||
this->descriptorSet,
|
||||
this->barriers,
|
||||
extent.width, extent.height, 1
|
||||
);
|
||||
}
|
||||
|
|
@ -1,98 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/descriptor_pool.hpp"
|
||||
#include "lsfg-vk-common/vulkan/descriptor_set.hpp"
|
||||
#include "lsfg-vk-common/vulkan/shader.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
|
||||
/// managed shader handling dispatch and barriers
|
||||
/// this class is NOT memory-safe
|
||||
class ManagedShader {
|
||||
friend class ManagedShaderBuilder;
|
||||
public:
|
||||
/// dispatch the managed shader
|
||||
/// @param vk the vulkan instance
|
||||
/// @param cmd command buffer to use
|
||||
/// @param extent dispatch size
|
||||
/// @throws ls::vulkan_error on failure
|
||||
void dispatch(const vk::Vulkan& vk,
|
||||
const vk::CommandBuffer& cmd, VkExtent2D extent) const;
|
||||
private:
|
||||
ls::R<const vk::Shader> shader;
|
||||
|
||||
std::vector<vk::Barrier> barriers;
|
||||
vk::DescriptorSet descriptorSet;
|
||||
|
||||
// simple move constructor
|
||||
ManagedShader(ls::R<const vk::Shader> shader,
|
||||
std::vector<vk::Barrier> barriers,
|
||||
vk::DescriptorSet descriptorSet) :
|
||||
shader(shader),
|
||||
barriers(std::move(barriers)),
|
||||
descriptorSet(std::move(descriptorSet)) {
|
||||
}
|
||||
};
|
||||
|
||||
/// class for building managed shaders
|
||||
/// this class is NOT memory-safe
|
||||
class ManagedShaderBuilder {
|
||||
public:
|
||||
/// default constructor
|
||||
ManagedShaderBuilder() = default;
|
||||
|
||||
/// add a sampled image
|
||||
/// @param image image to add
|
||||
[[nodiscard]] ManagedShaderBuilder& sampled(const vk::Image& image);
|
||||
/// add multiple sampled images
|
||||
/// @param images images to add
|
||||
/// @param offset offset into images
|
||||
/// @param count number of images to add (0 = all)
|
||||
[[nodiscard]] ManagedShaderBuilder& sampleds(const std::vector<vk::Image>& images,
|
||||
size_t offset = 0, size_t count = 0);
|
||||
|
||||
/// add a storage image
|
||||
/// @param image image to add
|
||||
[[nodiscard]] ManagedShaderBuilder& storage(const vk::Image& image);
|
||||
/// add multiple storage images
|
||||
/// @param images images to add
|
||||
/// @param offset offset into images
|
||||
/// @param count number of images to add (0 = all)
|
||||
[[nodiscard]] ManagedShaderBuilder& storages(const std::vector<vk::Image>& images,
|
||||
size_t offset = 0, size_t count = 0);
|
||||
|
||||
/// add a sampler
|
||||
/// @param sampler sampler to add
|
||||
[[nodiscard]] ManagedShaderBuilder& sampler(const vk::Sampler& sampler);
|
||||
/// add multiple samplers
|
||||
/// @param samplers samplers to add
|
||||
[[nodiscard]] ManagedShaderBuilder& samplers(const std::vector<vk::Sampler>& samplers);
|
||||
|
||||
/// add a buffer
|
||||
/// @param buffer buffer to add
|
||||
[[nodiscard]] ManagedShaderBuilder& buffer(const vk::Buffer& buffer);
|
||||
|
||||
/// build the managed shader
|
||||
/// @param vk the vulkan instance
|
||||
/// @param pool the descriptor pool to use
|
||||
/// @param shader the shader to use
|
||||
/// @returns the built managed shader
|
||||
[[nodiscard]] ManagedShader build(const vk::Vulkan& vk,
|
||||
const vk::DescriptorPool& pool, const vk::Shader& shader) const;
|
||||
private:
|
||||
std::vector<ls::R<const vk::Image>> sampledImages;
|
||||
std::vector<ls::R<const vk::Image>> storageImages;
|
||||
std::vector<ls::R<const vk::Sampler>> imageSamplers;
|
||||
std::vector<ls::R<const vk::Buffer>> constantBuffers;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "utils.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk;
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
ConstantBuffer backend::getDefaultConstantBuffer(
|
||||
size_t index, size_t total,
|
||||
bool hdr, float invFlow) {
|
||||
return ConstantBuffer {
|
||||
.advancedColorKind = hdr ? 2U : 0U,
|
||||
.hdrSupport = hdr ? 1U : 0U,
|
||||
.resolutionInvScale = invFlow,
|
||||
.timestamp = static_cast<float>(index + 1) / static_cast<float>(total + 1),
|
||||
.uiThreshold = 0.5F
|
||||
};
|
||||
}
|
||||
|
||||
VkExtent2D backend::shift_extent(VkExtent2D extent, uint32_t i) {
|
||||
return VkExtent2D{
|
||||
.width = extent.width >> i,
|
||||
.height = extent.height >> i
|
||||
};
|
||||
}
|
||||
|
||||
VkExtent2D backend::add_shift_extent(VkExtent2D extent, uint32_t a, uint32_t i) {
|
||||
return VkExtent2D{
|
||||
.width = (extent.width + a) >> i,
|
||||
.height = (extent.height + a) >> i
|
||||
};
|
||||
}
|
||||
|
||||
std::string backend::to_hex_id(uint32_t id) {
|
||||
const std::array<char, 17> chars = std::to_array("0123456789ABCDEF");
|
||||
|
||||
std::string result = "0x";
|
||||
result += chars.at((id >> 12) & 0xF);
|
||||
result += chars.at((id >> 8) & 0xF);
|
||||
result += chars.at((id >> 4) & 0xF);
|
||||
result += chars.at(id & 0xF);
|
||||
return result;
|
||||
}
|
||||
|
|
@ -1,82 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../extraction/shader_registry.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/descriptor_pool.hpp"
|
||||
#include "lsfg-vk-common/vulkan/sampler.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
/// exposed context data
|
||||
struct Ctx {
|
||||
ls::R<const vk::Vulkan> vk; // safe back reference
|
||||
ls::R<const ShaderRegistry> shaders; // safe back reference
|
||||
|
||||
vk::DescriptorPool pool;
|
||||
|
||||
vk::Buffer constantBuffer;
|
||||
std::vector<vk::Buffer> constantBuffers;
|
||||
vk::Sampler bnbSampler; //!< border, no compare, black
|
||||
vk::Sampler bnwSampler; //!< border, no compare, white
|
||||
vk::Sampler eabSampler; //!< edge, always compare, black
|
||||
|
||||
VkExtent2D sourceExtent;
|
||||
VkExtent2D flowExtent;
|
||||
|
||||
bool hdr;
|
||||
float flow;
|
||||
bool perf;
|
||||
size_t count;
|
||||
};
|
||||
|
||||
/// constant buffer used in shaders
|
||||
struct ConstantBuffer {
|
||||
std::array<uint32_t, 2> inputOffset;
|
||||
uint32_t firstIter;
|
||||
uint32_t firstIterS;
|
||||
uint32_t advancedColorKind;
|
||||
uint32_t hdrSupport;
|
||||
float resolutionInvScale;
|
||||
float timestamp;
|
||||
float uiThreshold;
|
||||
std::array<uint32_t, 3> pad;
|
||||
};
|
||||
|
||||
/// get a prefilled constant buffer
|
||||
/// @param index timestamp index
|
||||
/// @param total total amount of images
|
||||
/// @param hdr whether HDR is enabled
|
||||
/// @param invFlow inverted flow scale value
|
||||
/// @return prefilled constant buffer
|
||||
ConstantBuffer getDefaultConstantBuffer(
|
||||
size_t index, size_t total,
|
||||
bool hdr, float invFlow
|
||||
);
|
||||
|
||||
/// round down a VkExtent2D
|
||||
/// @param extent the extent to shift
|
||||
/// @param i the amount to shift by
|
||||
/// @return the shifted extent
|
||||
VkExtent2D shift_extent(VkExtent2D extent, uint32_t i);
|
||||
|
||||
/// round up a VkExtent2D
|
||||
/// @param extent the extent to shift
|
||||
/// @param a the amount to add before shifting
|
||||
/// @param i the amount to shift by
|
||||
/// @return the shifted extent
|
||||
VkExtent2D add_shift_extent(VkExtent2D extent, uint32_t a, uint32_t i);
|
||||
|
||||
/// convert a device/vendor id into a hex string
|
||||
std::string to_hex_id(uint32_t id);
|
||||
}
|
||||
|
|
@ -1,666 +1,241 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "lsfg-vk-backend/lsfgvk.hpp"
|
||||
#include "extraction/dll_reader.hpp"
|
||||
#include "extraction/shader_registry.hpp"
|
||||
#include "helpers/limits.hpp"
|
||||
#include "helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/errors.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/fence.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/timeline_semaphore.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
#include "shaderchains/alpha0.hpp"
|
||||
#include "shaderchains/alpha1.hpp"
|
||||
#include "shaderchains/beta0.hpp"
|
||||
#include "shaderchains/beta1.hpp"
|
||||
#include "shaderchains/delta0.hpp"
|
||||
#include "shaderchains/delta1.hpp"
|
||||
#include "shaderchains/gamma0.hpp"
|
||||
#include "shaderchains/gamma1.hpp"
|
||||
#include "shaderchains/generate.hpp"
|
||||
#include "shaderchains/mipmaps.hpp"
|
||||
#include "lsfgvk.hpp"
|
||||
#include "modules/library.hpp"
|
||||
#include "modules/pipeline.hpp"
|
||||
#include "utility/pipelines.hpp"
|
||||
#include "utility/vkhelper.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <exception>
|
||||
#include <filesystem>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
#ifdef LSFGVK_TESTING_RENDERDOC
|
||||
#include <renderdoc_app.h>
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
using namespace lsfgvk;
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
error::error(const std::string& msg, const std::exception& inner)
|
||||
: std::runtime_error(msg + "\n- " + inner.what()) {}
|
||||
error::error(const std::string& msg)
|
||||
: std::runtime_error(msg) {}
|
||||
error::~error() = default;
|
||||
|
||||
/// instance class
|
||||
class InstanceImpl {
|
||||
public:
|
||||
/// create an instance
|
||||
/// (see lsfg-vk documentation)
|
||||
InstanceImpl(vk::PhysicalDeviceSelector selectPhysicalDevice,
|
||||
const std::filesystem::path& shaderDllPath,
|
||||
bool allowLowPrecision);
|
||||
|
||||
/// get the Vulkan instance
|
||||
/// @return the Vulkan instance
|
||||
[[nodiscard]] const auto& getVulkan() const { return this->vk; }
|
||||
/// get the shader registry
|
||||
/// @return the shader registry
|
||||
[[nodiscard]] const auto& getShaderRegistry() const { return this->shaders; }
|
||||
#ifdef LSFGVK_TESTING_RENDERDOC
|
||||
/// get the RenderDoc API
|
||||
/// @return the RenderDoc API
|
||||
[[nodiscard]] const auto& getRenderDocAPI() const { return this->renderdoc; }
|
||||
#endif
|
||||
// Movable, non-copyable, custom destructor
|
||||
InstanceImpl(const InstanceImpl&) = delete;
|
||||
InstanceImpl& operator=(const InstanceImpl&) = delete;
|
||||
InstanceImpl(InstanceImpl&&) = default;
|
||||
InstanceImpl& operator=(InstanceImpl&&) = default;
|
||||
~InstanceImpl();
|
||||
private:
|
||||
vk::Vulkan vk;
|
||||
ShaderRegistry shaders;
|
||||
|
||||
#ifdef LSFGVK_TESTING_RENDERDOC
|
||||
std::optional<RENDERDOC_API_1_6_0> renderdoc;
|
||||
#endif
|
||||
};
|
||||
|
||||
/// context class
|
||||
class ContextImpl {
|
||||
public:
|
||||
/// create a context
|
||||
/// (see lsfg-vk documentation)
|
||||
ContextImpl(const InstanceImpl& instance,
|
||||
std::pair<int, int> sourceFds, const std::vector<int>& destFds, int syncFd,
|
||||
VkExtent2D extent, bool hdr, float flow, bool perf);
|
||||
|
||||
/// schedule frames
|
||||
/// (see lsfg-vk documentation)
|
||||
void scheduleFrames();
|
||||
private:
|
||||
std::pair<vk::Image, vk::Image> sourceImages;
|
||||
std::vector<vk::Image> destImages;
|
||||
vk::Image blackImage;
|
||||
|
||||
vk::TimelineSemaphore syncSemaphore; // imported
|
||||
vk::TimelineSemaphore prepassSemaphore;
|
||||
size_t idx{1};
|
||||
size_t fidx{0}; // real frame index
|
||||
|
||||
std::vector<vk::CommandBuffer> cmdbufs;
|
||||
vk::Fence cmdbufFence;
|
||||
|
||||
Ctx ctx;
|
||||
|
||||
Mipmaps mipmaps;
|
||||
std::array<Alpha0, 7> alpha0;
|
||||
std::array<Alpha1, 7> alpha1;
|
||||
Beta0 beta0;
|
||||
Beta1 beta1;
|
||||
struct Pass {
|
||||
std::vector<Gamma0> gamma0;
|
||||
std::vector<Gamma1> gamma1;
|
||||
|
||||
std::vector<Delta0> delta0;
|
||||
std::vector<Delta1> delta1;
|
||||
ls::lazy<Generate> generate;
|
||||
};
|
||||
std::vector<Pass> passes;
|
||||
};
|
||||
}
|
||||
|
||||
Instance::Instance(
|
||||
const DevicePicker& devicePicker,
|
||||
const std::filesystem::path& shaderDllPath,
|
||||
bool allowLowPrecision) {
|
||||
const auto selectFunc = [&devicePicker](const vk::VulkanInstanceFuncs funcs,
|
||||
const std::vector<VkPhysicalDevice>& devices) {
|
||||
for (const auto& device : devices) {
|
||||
// check if the physical device supports VK_EXT_pci_bus_info
|
||||
uint32_t ext_count{};
|
||||
funcs.EnumerateDeviceExtensionProperties(device, nullptr, &ext_count, VK_NULL_HANDLE);
|
||||
const std::string& deviceId,
|
||||
const std::filesystem::path& lsfgvkDllPath,
|
||||
bool allowFP16
|
||||
) {
|
||||
// Create Vulkan context
|
||||
auto dld{std::make_unique<vk::detail::DispatchLoaderDynamic>()};
|
||||
|
||||
std::vector<VkExtensionProperties> extensions(ext_count);
|
||||
funcs.EnumerateDeviceExtensionProperties(device, nullptr, &ext_count, extensions.data());
|
||||
auto instance{vkhelper::createInstance(*dld)};
|
||||
auto physdev{vkhelper::findPhysicalDevice(*dld, *instance, deviceId)};
|
||||
|
||||
const bool has_pci_ext = std::ranges::find_if(extensions,
|
||||
[](const VkExtensionProperties& ext) {
|
||||
return std::string(std::to_array(ext.extensionName).data())
|
||||
== VK_EXT_PCI_BUS_INFO_EXTENSION_NAME;
|
||||
}) != extensions.end();
|
||||
const uint32_t qfi{vkhelper::findComputeQueueFamilyIndex(*dld, physdev)};
|
||||
const bool fp16{allowFP16 && vkhelper::checkHalfPrecisionSupport(*dld, physdev)};
|
||||
|
||||
// then fetch all available properties
|
||||
VkPhysicalDevicePCIBusInfoPropertiesEXT pciInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT
|
||||
};
|
||||
VkPhysicalDeviceProperties2 props{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
|
||||
.pNext = has_pci_ext ? &pciInfo : nullptr
|
||||
};
|
||||
funcs.GetPhysicalDeviceProperties2(device, &props);
|
||||
auto [device, queue] = vkhelper::createDevice(*dld, physdev, qfi, fp16);
|
||||
|
||||
std::array<char, 256> devname = std::to_array(props.properties.deviceName);
|
||||
devname.at(255) = '\0'; // ensure null-termination
|
||||
|
||||
if (devicePicker(
|
||||
std::string(devname.data()),
|
||||
{ backend::to_hex_id(props.properties.vendorID),
|
||||
backend::to_hex_id(props.properties.deviceID) },
|
||||
has_pci_ext ? std::optional<std::string>{
|
||||
std::to_string(pciInfo.pciBus) + ":" +
|
||||
std::to_string(pciInfo.pciDevice) + "." +
|
||||
std::to_string(pciInfo.pciFunction)
|
||||
} : std::nullopt
|
||||
))
|
||||
return device;
|
||||
}
|
||||
|
||||
throw ls::vulkan_error("no suitable physical device found");
|
||||
// Construct instance
|
||||
library::ShaderLibrary library{
|
||||
*dld,
|
||||
*device,
|
||||
fp16,
|
||||
lsfgvkDllPath
|
||||
};
|
||||
|
||||
this->m_impl = std::make_unique<InstanceImpl>(
|
||||
selectFunc, shaderDllPath, allowLowPrecision
|
||||
this->m_priv = std::make_unique<priv::Instance>(priv::Instance {
|
||||
.vk = {
|
||||
.dld = std::move(dld),
|
||||
.instance = std::move(instance),
|
||||
.physdev = physdev,
|
||||
.device = std::move(device),
|
||||
.queue = queue,
|
||||
.qfi = qfi,
|
||||
.fp16 = fp16
|
||||
},
|
||||
.shaderLibrary = std::move(library)
|
||||
});
|
||||
}
|
||||
|
||||
Context::Context(
|
||||
const Instance& instance,
|
||||
uint32_t width,
|
||||
uint32_t height,
|
||||
float flowScale,
|
||||
bool performanceMode
|
||||
) {
|
||||
const auto& vk{instance.m_priv->vk};
|
||||
|
||||
pipeline::Pipeline pipeline{
|
||||
*vk.dld,
|
||||
*vk.device,
|
||||
vk.physdev,
|
||||
vk.queue,
|
||||
vk.qfi,
|
||||
instance.m_priv->shaderLibrary,
|
||||
lsfgvk::getPipelineSignature(performanceMode),
|
||||
{ width, height },
|
||||
flowScale,
|
||||
performanceMode,
|
||||
false
|
||||
};
|
||||
|
||||
this->m_priv = std::make_unique<priv::Context>(priv::Context {
|
||||
.instance = std::ref(*instance.m_priv),
|
||||
.pipeline = std::move(pipeline),
|
||||
.syncSemaphore = { vkhelper::createTimelineSemaphore(*vk.dld, *vk.device, true), 0 },
|
||||
.internalSemaphores = { vkhelper::createTimelineSemaphore(*vk.dld, *vk.device), 0 },
|
||||
.fence = vkhelper::createFence(*vk.dld, *vk.device),
|
||||
});
|
||||
}
|
||||
|
||||
FileDescriptors Context::exportFds() const {
|
||||
const auto& vk{this->m_priv->instance.get().vk};
|
||||
const auto& pipeline{this->m_priv->pipeline};
|
||||
|
||||
return{
|
||||
.sourceFd = vkhelper::exportMemoryFd(
|
||||
*vk.dld, *vk.device,
|
||||
pipeline.getExternalInputs().front().memory
|
||||
),
|
||||
.destinationFd = vkhelper::exportMemoryFd(
|
||||
*vk.dld, *vk.device,
|
||||
pipeline.getExternalOutputs().front().memory
|
||||
),
|
||||
.syncFd = vkhelper::exportSemaphoreFd(
|
||||
*vk.dld, *vk.device,
|
||||
*this->m_priv->syncSemaphore.first
|
||||
)
|
||||
};
|
||||
}
|
||||
|
||||
void Context::dispatch(uint32_t total) {
|
||||
auto& ctx{*this->m_priv};
|
||||
const auto& vk{ctx.instance.get().vk};
|
||||
|
||||
// Increment iteration counter after previous frame is completed
|
||||
auto* mapped{ctx.pipeline.getMappedBuffer()};
|
||||
if (ctx.firstIteration) {
|
||||
ctx.firstIteration = false;
|
||||
mapped->iteration = 0;
|
||||
} else {
|
||||
if (vk.device->waitForFences(*ctx.fence, true, UINT64_MAX, *vk.dld) != vk::Result::eSuccess)
|
||||
throw std::runtime_error("Unable to wait for completion of previous iteration");
|
||||
vk.device->resetFences(*ctx.fence, *vk.dld);
|
||||
mapped->iteration++;
|
||||
}
|
||||
|
||||
const auto& cmdbufs{ctx.pipeline.getCmdbufs()};
|
||||
|
||||
// Dispatch pre-pass
|
||||
auto& sync{ctx.syncSemaphore};
|
||||
sync.second++;
|
||||
|
||||
auto& internal{ctx.internalSemaphores};
|
||||
internal.second++;
|
||||
|
||||
vk::TimelineSemaphoreSubmitInfo timelineInfo{
|
||||
.waitSemaphoreValueCount = 1,
|
||||
.pWaitSemaphoreValues = &sync.second,
|
||||
.signalSemaphoreValueCount = 1,
|
||||
.pSignalSemaphoreValues = &internal.second
|
||||
};
|
||||
|
||||
const vk::PipelineStageFlags waitStage{vk::PipelineStageFlagBits::eTopOfPipe};
|
||||
vk.queue.submit(
|
||||
{{
|
||||
.pNext = &timelineInfo,
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &*sync.first,
|
||||
.pWaitDstStageMask = &waitStage,
|
||||
.commandBufferCount = 1U,
|
||||
.pCommandBuffers = &*cmdbufs.at(0),
|
||||
.signalSemaphoreCount = 1,
|
||||
.pSignalSemaphores = &*internal.first
|
||||
}},
|
||||
nullptr,
|
||||
*vk.dld
|
||||
);
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// find the cache file path
|
||||
std::filesystem::path findCacheFilePath() {
|
||||
const char* xdgCacheHome = std::getenv("XDG_CACHE_HOME");
|
||||
if (xdgCacheHome && *xdgCacheHome != '\0')
|
||||
return std::filesystem::path(xdgCacheHome) / "lsfg-vk_pipeline_cache.bin";
|
||||
// Dispatch main passes
|
||||
uint64_t prevInternal{};
|
||||
for (uint32_t i = 0; i < total; i++) {
|
||||
const auto& transCmdbuf{ctx.pipeline.buildTransCmdbuf(
|
||||
*vk.dld, *vk.device,
|
||||
mapped->iteration,
|
||||
i, total
|
||||
)};
|
||||
|
||||
const char* home = std::getenv("HOME");
|
||||
if (home && *home != '\0')
|
||||
return std::filesystem::path(home) / ".cache" / "lsfg-vk_pipeline_cache.bin";
|
||||
|
||||
return{"/tmp/lsfg-vk_pipeline_cache.bin"};
|
||||
}
|
||||
/// create a Vulkan instance
|
||||
vk::Vulkan createVulkanInstance(vk::PhysicalDeviceSelector selectPhysicalDevice) {
|
||||
try {
|
||||
return{
|
||||
"lsfg-vk", vk::version{2, 0, 0},
|
||||
"lsfg-vk-engine", vk::version{2, 0, 0},
|
||||
selectPhysicalDevice,
|
||||
false, std::nullopt,
|
||||
findCacheFilePath()
|
||||
};
|
||||
} catch (const std::exception& e) {
|
||||
throw backend::error("Unable to initialize Vulkan", e);
|
||||
}
|
||||
}
|
||||
/// build a shader registry
|
||||
ShaderRegistry createShaderRegistry(vk::Vulkan& vk,
|
||||
const std::filesystem::path& shaderDllPath,
|
||||
bool allowLowPrecision) {
|
||||
std::unordered_map<uint32_t, std::vector<uint8_t>> resources{};
|
||||
|
||||
try {
|
||||
resources = backend::extractResourcesFromDLL(shaderDllPath);
|
||||
} catch (const std::exception& e) {
|
||||
throw backend::error("Unable to parse Lossless Scaling DLL", e);
|
||||
// Transition command buffer to next timestamp
|
||||
if (i == 0) {
|
||||
prevInternal = internal.second;
|
||||
timelineInfo.pWaitSemaphoreValues = &prevInternal;
|
||||
} else {
|
||||
sync.second++;
|
||||
timelineInfo.pWaitSemaphoreValues = &sync.second;
|
||||
}
|
||||
|
||||
try {
|
||||
return backend::buildShaderRegistry(
|
||||
vk, allowLowPrecision && vk.supportsFP16(),
|
||||
resources
|
||||
);
|
||||
} catch (const std::exception& e) {
|
||||
throw backend::error("Unable to build shader registry", e);
|
||||
}
|
||||
}
|
||||
#ifdef LSFGVK_TESTING_RENDERDOC
|
||||
/// load RenderDoc integration
|
||||
std::optional<RENDERDOC_API_1_6_0> loadRenderDocIntegration() {
|
||||
void* module = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD);
|
||||
if (!module)
|
||||
return std::nullopt;
|
||||
internal.second++;
|
||||
timelineInfo.pSignalSemaphoreValues = &internal.second;
|
||||
|
||||
auto renderdocGetAPI = reinterpret_cast<pRENDERDOC_GetAPI>(
|
||||
dlsym(module, "RENDERDOC_GetAPI"));
|
||||
if (!renderdocGetAPI)
|
||||
return std::nullopt;
|
||||
vk.queue.submit(
|
||||
{{
|
||||
.pNext = &timelineInfo,
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = i == 0 ? &*internal.first : &*sync.first,
|
||||
.pWaitDstStageMask = &waitStage,
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &transCmdbuf,
|
||||
.signalSemaphoreCount = 1,
|
||||
.pSignalSemaphores = &*internal.first
|
||||
}},
|
||||
nullptr,
|
||||
*vk.dld
|
||||
);
|
||||
|
||||
RENDERDOC_API_1_6_0* api{};
|
||||
renderdocGetAPI(eRENDERDOC_API_Version_1_6_0, reinterpret_cast<void**>(&api));
|
||||
if (!api)
|
||||
return std::nullopt;
|
||||
// Dispatch main pass
|
||||
timelineInfo.pWaitSemaphoreValues = &internal.second;
|
||||
|
||||
return *api;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
sync.second++;
|
||||
timelineInfo.pSignalSemaphoreValues = &sync.second;
|
||||
|
||||
InstanceImpl::InstanceImpl(vk::PhysicalDeviceSelector selectPhysicalDevice,
|
||||
const std::filesystem::path& shaderDllPath,
|
||||
bool allowLowPrecision)
|
||||
: vk(createVulkanInstance(selectPhysicalDevice)),
|
||||
shaders(createShaderRegistry(this->vk, shaderDllPath,
|
||||
allowLowPrecision && vk.supportsFP16())) {
|
||||
#ifdef LSFGVK_TESTING_RENDERDOC
|
||||
this->renderdoc = loadRenderDocIntegration();
|
||||
#endif
|
||||
vk.persistPipelineCache(); // will silently fail
|
||||
}
|
||||
|
||||
Context& Instance::openContext(std::pair<int, int> sourceFds, const std::vector<int>& destFds,
|
||||
int syncFd, uint32_t width, uint32_t height,
|
||||
bool hdr, float flow, bool perf) {
|
||||
const VkExtent2D extent{ width, height };
|
||||
return *this->m_contexts.emplace_back(std::make_unique<ContextImpl>(*this->m_impl,
|
||||
sourceFds, destFds, syncFd,
|
||||
extent, hdr, flow, perf
|
||||
)).get();
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// import source images
|
||||
std::pair<vk::Image, vk::Image> importImages(const vk::Vulkan& vk,
|
||||
const std::pair<int, int>& sourceFds,
|
||||
VkExtent2D extent, VkFormat format) {
|
||||
try {
|
||||
return {
|
||||
vk::Image(vk, extent, format,
|
||||
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, sourceFds.first),
|
||||
vk::Image(vk, extent, format,
|
||||
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, sourceFds.second)
|
||||
};
|
||||
} catch (const std::exception& e) {
|
||||
throw backend::error("Unable to import destination images", e);
|
||||
}
|
||||
}
|
||||
/// import destination images
|
||||
std::vector<vk::Image> importImages(const vk::Vulkan& vk,
|
||||
const std::vector<int>& destFds,
|
||||
VkExtent2D extent, VkFormat format) {
|
||||
try {
|
||||
std::vector<vk::Image> destImages;
|
||||
destImages.reserve(destFds.size());
|
||||
|
||||
for (const auto& fd : destFds)
|
||||
destImages.emplace_back(vk, extent, format,
|
||||
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, fd);
|
||||
|
||||
return destImages;
|
||||
} catch (const std::exception& e) {
|
||||
throw backend::error("Unable to import destination images", e);
|
||||
}
|
||||
}
|
||||
/// create a black image
|
||||
vk::Image createBlackImage(const vk::Vulkan& vk) {
|
||||
try {
|
||||
return{vk,
|
||||
{ .width = 4, .height = 4 }
|
||||
};
|
||||
} catch (const std::exception& e) {
|
||||
throw backend::error("Unable to create black image", e);
|
||||
}
|
||||
}
|
||||
/// import timeline semaphore
|
||||
vk::TimelineSemaphore importTimelineSemaphore(const vk::Vulkan& vk, int syncFd) {
|
||||
try {
|
||||
return{vk, 0, syncFd};
|
||||
} catch (const std::exception& e) {
|
||||
throw backend::error("Unable to import timeline semaphore", e);
|
||||
}
|
||||
}
|
||||
/// create prepass semaphores
|
||||
vk::TimelineSemaphore createPrepassSemaphore(const vk::Vulkan& vk) {
|
||||
try {
|
||||
return{vk, 0};
|
||||
} catch (const std::exception& e) {
|
||||
throw backend::error("Unable to create prepass semaphore", e);
|
||||
}
|
||||
}
|
||||
/// create command buffers
|
||||
std::vector<vk::CommandBuffer> createCommandBuffers(const vk::Vulkan& vk, size_t count) {
|
||||
try {
|
||||
std::vector<vk::CommandBuffer> cmdbufs;
|
||||
cmdbufs.reserve(count);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
cmdbufs.emplace_back(vk);
|
||||
|
||||
return cmdbufs;
|
||||
} catch (const std::exception& e) {
|
||||
throw backend::error("Unable to create command buffers", e);
|
||||
}
|
||||
}
|
||||
/// create context data
|
||||
Ctx createCtx(const InstanceImpl& instance, VkExtent2D extent,
|
||||
bool hdr, float flow, bool perf, size_t count) {
|
||||
const auto& vk = instance.getVulkan();
|
||||
const auto& shaders = instance.getShaderRegistry();
|
||||
|
||||
try {
|
||||
std::vector<vk::Buffer> constantBuffers{};
|
||||
constantBuffers.reserve(count);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
constantBuffers.emplace_back(vk,
|
||||
backend::getDefaultConstantBuffer(
|
||||
i, count,
|
||||
hdr, flow
|
||||
)
|
||||
);
|
||||
|
||||
return {
|
||||
.vk = std::ref(vk),
|
||||
.shaders = std::ref(shaders),
|
||||
.pool{vk, backend::calculateDescriptorPoolLimits(count, perf)},
|
||||
.constantBuffer{vk, backend::getDefaultConstantBuffer(0, 1, hdr, flow)},
|
||||
.constantBuffers{std::move(constantBuffers)},
|
||||
.bnbSampler{vk, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, false},
|
||||
.bnwSampler{vk, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, true},
|
||||
.eabSampler{vk, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS, false},
|
||||
.sourceExtent = extent,
|
||||
.flowExtent = VkExtent2D {
|
||||
.width = static_cast<uint32_t>(static_cast<float>(extent.width) / flow),
|
||||
.height = static_cast<uint32_t>(static_cast<float>(extent.height) / flow)
|
||||
},
|
||||
.hdr = hdr,
|
||||
.flow = flow,
|
||||
.perf = perf,
|
||||
.count = count
|
||||
};
|
||||
} catch (const std::exception& e) {
|
||||
throw backend::error("Unable to create context", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ContextImpl::ContextImpl(const InstanceImpl& instance,
|
||||
std::pair<int, int> sourceFds, const std::vector<int>& destFds, int syncFd,
|
||||
VkExtent2D extent, bool hdr, float flow, bool perf) :
|
||||
sourceImages(importImages(instance.getVulkan(), sourceFds,
|
||||
extent, hdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM)),
|
||||
destImages(importImages(instance.getVulkan(), destFds,
|
||||
extent, hdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM)),
|
||||
blackImage(createBlackImage(instance.getVulkan())),
|
||||
syncSemaphore(importTimelineSemaphore(instance.getVulkan(), syncFd)),
|
||||
prepassSemaphore(createPrepassSemaphore(instance.getVulkan())),
|
||||
cmdbufs(createCommandBuffers(instance.getVulkan(), destFds.size() + 1)),
|
||||
cmdbufFence(instance.getVulkan()),
|
||||
ctx(createCtx(instance, extent, hdr, flow, perf, destFds.size())),
|
||||
mipmaps(ctx, sourceImages),
|
||||
alpha0{
|
||||
Alpha0(ctx, mipmaps.getImages().at(0)),
|
||||
Alpha0(ctx, mipmaps.getImages().at(1)),
|
||||
Alpha0(ctx, mipmaps.getImages().at(2)),
|
||||
Alpha0(ctx, mipmaps.getImages().at(3)),
|
||||
Alpha0(ctx, mipmaps.getImages().at(4)),
|
||||
Alpha0(ctx, mipmaps.getImages().at(5)),
|
||||
Alpha0(ctx, mipmaps.getImages().at(6))
|
||||
},
|
||||
alpha1{
|
||||
Alpha1(ctx, 3, alpha0.at(0).getImages()),
|
||||
Alpha1(ctx, 2, alpha0.at(1).getImages()),
|
||||
Alpha1(ctx, 2, alpha0.at(2).getImages()),
|
||||
Alpha1(ctx, 2, alpha0.at(3).getImages()),
|
||||
Alpha1(ctx, 2, alpha0.at(4).getImages()),
|
||||
Alpha1(ctx, 2, alpha0.at(5).getImages()),
|
||||
Alpha1(ctx, 2, alpha0.at(6).getImages())
|
||||
},
|
||||
beta0(ctx, alpha1.at(0).getImages()),
|
||||
beta1(ctx, beta0.getImages()) {
|
||||
// build main passes
|
||||
for (size_t i = 0; i < destImages.size(); ++i) {
|
||||
auto& pass = this->passes.emplace_back();
|
||||
|
||||
pass.gamma0.reserve(7);
|
||||
pass.gamma1.reserve(7);
|
||||
pass.delta0.reserve(3);
|
||||
pass.delta1.reserve(3);
|
||||
for (size_t j = 0; j < 7; j++) {
|
||||
if (j == 0) { // first pass has no prior data
|
||||
pass.gamma0.emplace_back(ctx, i,
|
||||
this->alpha1.at(6 - j).getImages(),
|
||||
this->blackImage
|
||||
);
|
||||
pass.gamma1.emplace_back(ctx, i,
|
||||
pass.gamma0.at(j).getImages(),
|
||||
this->blackImage,
|
||||
this->beta1.getImages().at(5)
|
||||
);
|
||||
} else { // other passes use prior data
|
||||
pass.gamma0.emplace_back(ctx, i,
|
||||
this->alpha1.at(6 - j).getImages(),
|
||||
pass.gamma1.at(j - 1).getImage()
|
||||
);
|
||||
pass.gamma1.emplace_back(ctx, i,
|
||||
pass.gamma0.at(j).getImages(),
|
||||
pass.gamma1.at(j - 1).getImage(),
|
||||
this->beta1.getImages().at(6 - j)
|
||||
);
|
||||
}
|
||||
|
||||
if (j == 4) { // first special pass has no prior data
|
||||
pass.delta0.emplace_back(ctx, i,
|
||||
this->alpha1.at(6 - j).getImages(),
|
||||
this->blackImage,
|
||||
pass.gamma1.at(j - 1).getImage()
|
||||
);
|
||||
pass.delta1.emplace_back(ctx, i,
|
||||
pass.delta0.at(j - 4).getImages0(),
|
||||
pass.delta0.at(j - 4).getImages1(),
|
||||
this->blackImage,
|
||||
this->beta1.getImages().at(6 - j),
|
||||
this->blackImage
|
||||
);
|
||||
} else if (j > 4) { // further passes do
|
||||
pass.delta0.emplace_back(ctx, i,
|
||||
this->alpha1.at(6 - j).getImages(),
|
||||
pass.delta1.at(j - 5).getImage0(),
|
||||
pass.gamma1.at(j - 1).getImage()
|
||||
);
|
||||
pass.delta1.emplace_back(ctx, i,
|
||||
pass.delta0.at(j - 4).getImages0(),
|
||||
pass.delta0.at(j - 4).getImages1(),
|
||||
pass.delta1.at(j - 5).getImage0(),
|
||||
this->beta1.getImages().at(6 - j),
|
||||
pass.delta1.at(j - 5).getImage1()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pass.generate.emplace(ctx, i,
|
||||
this->sourceImages,
|
||||
pass.gamma1.at(6).getImage(),
|
||||
pass.delta1.at(2).getImage0(),
|
||||
pass.delta1.at(2).getImage1(),
|
||||
this->destImages.at(i)
|
||||
vk.queue.submit(
|
||||
{{
|
||||
.pNext = &timelineInfo,
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &*internal.first,
|
||||
.pWaitDstStageMask = &waitStage,
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &*cmdbufs.at(1),
|
||||
.signalSemaphoreCount = 1,
|
||||
.pSignalSemaphores = &*sync.first
|
||||
}},
|
||||
i == (total - 1) ? *ctx.fence : nullptr,
|
||||
*vk.dld
|
||||
);
|
||||
}
|
||||
|
||||
// initialize all images
|
||||
std::vector<VkImage> images{};
|
||||
images.push_back(this->blackImage.handle());
|
||||
mipmaps.prepare(images);
|
||||
for (size_t i = 0; i < 7; ++i) {
|
||||
alpha0.at(i).prepare(images);
|
||||
alpha1.at(i).prepare(images);
|
||||
}
|
||||
beta0.prepare(images);
|
||||
beta1.prepare(images);
|
||||
for (const auto& pass : this->passes) {
|
||||
for (size_t i = 0; i < 7; ++i) {
|
||||
pass.gamma0.at(i).prepare(images);
|
||||
pass.gamma1.at(i).prepare(images);
|
||||
|
||||
if (i < 4) continue;
|
||||
pass.delta0.at(i - 4).prepare(images);
|
||||
pass.delta1.at(i - 4).prepare(images);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<vk::Barrier> barriers{};
|
||||
barriers.reserve(images.size());
|
||||
|
||||
for (const auto& image : images) {
|
||||
barriers.emplace_back(vk::Barrier {
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.levelCount = 1,
|
||||
.layerCount = 1
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const vk::CommandBuffer cmdbuf{ctx.vk};
|
||||
cmdbuf.begin(ctx.vk);
|
||||
cmdbuf.insertBarriers(ctx.vk, barriers);
|
||||
cmdbuf.end(ctx.vk);
|
||||
cmdbuf.submit(ctx.vk); // wait for completion
|
||||
}
|
||||
|
||||
void Instance::scheduleFrames(Context& context) { // NOLINT (static)
|
||||
#ifdef LSFGVK_TESTING_RENDERDOC
|
||||
const auto& impl = this->m_impl;
|
||||
if (impl->getRenderDocAPI()) {
|
||||
impl->getRenderDocAPI()->StartFrameCapture(
|
||||
RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(impl->getVulkan().inst()),
|
||||
nullptr);
|
||||
}
|
||||
#endif
|
||||
void Context::idle() const {
|
||||
const auto& ctx{*this->m_priv};
|
||||
const auto& vk{ctx.instance.get().vk};
|
||||
|
||||
vk.device->waitIdle(*vk.dld);
|
||||
}
|
||||
|
||||
Context::~Context() {
|
||||
try {
|
||||
context.scheduleFrames();
|
||||
} catch (const std::exception& e) {
|
||||
throw backend::error("Unable to schedule frames", e);
|
||||
// NOTE: This will freeze if the user didn't signal the sync semaphore high enough to
|
||||
// allow the pipeline to complete.
|
||||
this->idle();
|
||||
} catch (...) { // NOLINT (empty catch)
|
||||
// Not much we can do here..
|
||||
}
|
||||
#ifdef LSFGVK_TESTING_RENDERDOC
|
||||
if (impl->getRenderDocAPI()) {
|
||||
impl->getVulkan().df().DeviceWaitIdle(impl->getVulkan().dev());
|
||||
impl->getRenderDocAPI()->EndFrameCapture(
|
||||
RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(impl->getVulkan().inst()),
|
||||
nullptr);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Context::scheduleFrames() {
|
||||
// wait for previous pre-pass to complete
|
||||
if (this->fidx && !this->cmdbufFence.wait(this->ctx.vk))
|
||||
throw backend::error("Timeout waiting for previous frame to complete");
|
||||
this->cmdbufFence.reset(this->ctx.vk);
|
||||
|
||||
// schedule pre-pass
|
||||
const auto& cmdbuf = this->cmdbufs.at(0);
|
||||
cmdbuf.begin(ctx.vk);
|
||||
|
||||
this->mipmaps.render(ctx.vk, cmdbuf, this->fidx);
|
||||
for (size_t i = 0; i < 7; ++i) {
|
||||
this->alpha0.at(6 - i).render(ctx.vk, cmdbuf);
|
||||
this->alpha1.at(6 - i).render(ctx.vk, cmdbuf, this->fidx);
|
||||
}
|
||||
this->beta0.render(ctx.vk, cmdbuf, this->fidx);
|
||||
this->beta1.render(ctx.vk, cmdbuf);
|
||||
|
||||
cmdbuf.end(ctx.vk);
|
||||
cmdbuf.submit(this->ctx.vk,
|
||||
{}, this->syncSemaphore.handle(), this->idx,
|
||||
{}, this->prepassSemaphore.handle(), this->idx
|
||||
);
|
||||
|
||||
this->idx++;
|
||||
|
||||
// schedule main passes
|
||||
for (size_t i = 0; i < this->destImages.size(); i++) {
|
||||
const auto& cmdbuf = this->cmdbufs.at(i + 1);
|
||||
cmdbuf.begin(ctx.vk);
|
||||
|
||||
const auto& pass = this->passes.at(i);
|
||||
for (size_t j = 0; j < 7; j++) {
|
||||
pass.gamma0.at(j).render(ctx.vk, cmdbuf, this->fidx);
|
||||
pass.gamma1.at(j).render(ctx.vk, cmdbuf);
|
||||
|
||||
if (j < 4) continue;
|
||||
pass.delta0.at(j - 4).render(ctx.vk, cmdbuf, this->fidx);
|
||||
pass.delta1.at(j - 4).render(ctx.vk, cmdbuf);
|
||||
}
|
||||
pass.generate->render(ctx.vk, cmdbuf, this->fidx);
|
||||
|
||||
cmdbuf.end(ctx.vk);
|
||||
cmdbuf.submit(this->ctx.vk,
|
||||
{}, this->prepassSemaphore.handle(), this->idx - 1,
|
||||
{}, this->syncSemaphore.handle(), this->idx + i,
|
||||
i == this->destImages.size() - 1 ? this->cmdbufFence.handle() : VK_NULL_HANDLE
|
||||
);
|
||||
}
|
||||
|
||||
this->idx += this->destImages.size();
|
||||
this->fidx++;
|
||||
VkInstance Instance::_instance() const {
|
||||
return this->m_priv->vk.instance.get();
|
||||
}
|
||||
|
||||
void Instance::closeContext(const Context& context) {
|
||||
auto it = std::ranges::find_if(this->m_contexts,
|
||||
[context = &context](const std::unique_ptr<ContextImpl>& ctx) {
|
||||
return ctx.get() == context;
|
||||
});
|
||||
if (it == this->m_contexts.end())
|
||||
throw backend::error("attempted to close unknown context",
|
||||
std::runtime_error("no such context"));
|
||||
|
||||
const auto& vk = this->m_impl->getVulkan();
|
||||
vk.df().DeviceWaitIdle(vk.dev());
|
||||
|
||||
this->m_contexts.erase(it);
|
||||
VkDevice Instance::_device() const {
|
||||
return *this->m_priv->vk.device;
|
||||
}
|
||||
|
||||
Instance::~Instance() = default;
|
||||
|
||||
// leaking shenanigans
|
||||
|
||||
namespace {
|
||||
bool leaking{false}; // NOLINT (global variable)
|
||||
}
|
||||
|
||||
InstanceImpl::~InstanceImpl() {
|
||||
if (!leaking) return;
|
||||
|
||||
try {
|
||||
new vk::Vulkan(std::move(this->vk));
|
||||
} catch (...) {
|
||||
std::cerr << "lsfg-vk: failed to leak Vulkan instance\n";
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void backend::makeLeaking() {
|
||||
leaking = true;
|
||||
}
|
||||
|
|
|
|||
59
lsfg-vk-backend/src/lsfgvk.hpp
Normal file
59
lsfg-vk-backend/src/lsfgvk.hpp
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#define LSFGVK_PRIV
|
||||
#include "lsfg-vk/lsfgvk.hpp" // IWYU pragma: export
|
||||
|
||||
#include "modules/pipeline.hpp"
|
||||
#include "modules/library.hpp"
|
||||
#include "utility/vkhelper.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <utility>
|
||||
|
||||
namespace lsfgvk::priv {
|
||||
|
||||
|
||||
/// Internal state of lsfg-vk
|
||||
struct Instance {
|
||||
/// Vulkan context
|
||||
struct Vulkan {
|
||||
/// Vulkan dispatch loader
|
||||
std::unique_ptr<vk::detail::DispatchLoaderDynamic> dld;
|
||||
/// Vulkan instance (1.2)
|
||||
vk::UniqueInstance instance;
|
||||
/// Vulkan physical device
|
||||
vk::PhysicalDevice physdev;
|
||||
/// Vulkan device with synchronization2 (extension), external memory & semaphore
|
||||
/// fd (extension) and timeline semaphores (core) enabled
|
||||
vk::UniqueDevice device;
|
||||
/// Compute queue
|
||||
vk::Queue queue;
|
||||
/// Compute queue family index
|
||||
uint32_t qfi;
|
||||
/// Whether fp16 is enabled and supported (shaderFloat16 is enabled)
|
||||
bool fp16;
|
||||
} vk;
|
||||
/// Shader library
|
||||
library::ShaderLibrary shaderLibrary;
|
||||
};
|
||||
|
||||
/// Internal context for frame generation
|
||||
struct Context {
|
||||
/// Parent instance
|
||||
std::reference_wrapper<Instance> instance;
|
||||
/// Pipeline instance
|
||||
pipeline::Pipeline pipeline;
|
||||
/// Shared synchronization semaphores
|
||||
std::pair<vk::UniqueSemaphore, uint64_t> syncSemaphore;
|
||||
/// Internal synchronization semaphores
|
||||
std::pair<vk::UniqueSemaphore, uint64_t> internalSemaphores;
|
||||
/// Frames-in-flight fence
|
||||
vk::UniqueFence fence;
|
||||
/// Is first iteration
|
||||
bool firstIteration{true};
|
||||
};
|
||||
|
||||
}
|
||||
91
lsfg-vk-backend/src/modules/library.cpp
Normal file
91
lsfg-vk-backend/src/modules/library.cpp
Normal file
|
|
@ -0,0 +1,91 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "library.hpp"
|
||||
#include "library/dll.hpp"
|
||||
#include "utility/vkhelper.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
|
||||
/// All base shaders in the library.
|
||||
const std::array<std::pair<std::string_view, uint32_t>, 3> BASE_LIBRARY{{
|
||||
{ "mipmaps", 0 },
|
||||
{ "generate_8bit", 1 },
|
||||
{ "generate_16bit", 2 },
|
||||
}};
|
||||
|
||||
/// All non-base shaders in the library.
|
||||
const std::array<std::pair<std::string_view, uint32_t>, 24> LIBRARY{{
|
||||
{ "alpha0", 13 },
|
||||
{ "alpha1", 14 },
|
||||
{ "alpha2", 15 },
|
||||
{ "alpha3", 16 },
|
||||
{ "beta0", 22 },
|
||||
{ "beta1", 23 },
|
||||
{ "beta2", 24 },
|
||||
{ "beta3", 25 },
|
||||
{ "beta4", 26 },
|
||||
{ "gamma0", 3 },
|
||||
{ "gamma1", 4 },
|
||||
{ "gamma2", 5 },
|
||||
{ "gamma3", 6 },
|
||||
{ "gamma4", 7 },
|
||||
{ "delta0", 8 },
|
||||
{ "delta1", 9 },
|
||||
{ "delta2", 10 },
|
||||
{ "delta3", 11 },
|
||||
{ "delta4", 12 },
|
||||
{ "epsilon0", 17 },
|
||||
{ "epsilon1", 18 },
|
||||
{ "epsilon2", 19 },
|
||||
{ "epsilon3", 20 },
|
||||
{ "epsilon4", 21 }
|
||||
}};
|
||||
|
||||
using namespace lsfgvk::library;
|
||||
|
||||
ShaderLibrary::ShaderLibrary(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
bool halfPrecision,
|
||||
const std::filesystem::path& dll
|
||||
) {
|
||||
if (!std::filesystem::exists(dll)) {
|
||||
throw std::runtime_error("The specified shader DLL does not exist");
|
||||
}
|
||||
// Create shader modules for each shader in the library
|
||||
const auto resources = priv::parseDll(dll);
|
||||
for (const auto& [name, idx] : BASE_LIBRARY) {
|
||||
const uint32_t rid{idx};
|
||||
|
||||
const auto& it = resources.find(rid == 0 ? 2147488584U : rid);
|
||||
if (it == resources.end())
|
||||
throw std::runtime_error(
|
||||
"Unable to find base shader '" + std::string(name) + "' in DLL"
|
||||
);
|
||||
|
||||
this->m_baseShaders[name] = vkhelper::createShaderModule(dld, device, it->second);
|
||||
}
|
||||
|
||||
for (const auto& [name, idx] : LIBRARY) {
|
||||
const std::pair<uint32_t, uint32_t> rid{
|
||||
idx + (halfPrecision ? 48 : 0),
|
||||
idx + (halfPrecision ? 48 : 0) + 24
|
||||
};
|
||||
|
||||
const auto& qit{resources.find(rid.first)};
|
||||
const auto& pit{resources.find(rid.second)};
|
||||
if (qit == resources.end() || pit == resources.end())
|
||||
throw std::runtime_error(
|
||||
"Unable to find shader '" + std::string(name) + "' in DLL"
|
||||
);
|
||||
|
||||
this->m_qualityShaders[name] = vkhelper::createShaderModule(dld, device, qit->second);
|
||||
this->m_performanceShaders[name] = vkhelper::createShaderModule(dld, device, pit->second);
|
||||
}
|
||||
}
|
||||
67
lsfg-vk-backend/src/modules/library.hpp
Normal file
67
lsfg-vk-backend/src/modules/library.hpp
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "utility/vkhelper.hpp"
|
||||
|
||||
#include <filesystem>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
|
||||
namespace lsfgvk::library {
|
||||
|
||||
///
|
||||
/// The lsfg-vk shader library
|
||||
///
|
||||
class ShaderLibrary {
|
||||
public:
|
||||
///
|
||||
/// Create the shader library
|
||||
///
|
||||
/// @param dld Vulkan dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param halfPrecision Whether to load the half-precision shader variants
|
||||
/// @param dll Path to the shader DLL file
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
explicit ShaderLibrary(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
bool halfPrecision,
|
||||
const std::filesystem::path& dll
|
||||
);
|
||||
|
||||
///
|
||||
/// Get a base shader by name
|
||||
///
|
||||
/// @param name Shader name
|
||||
/// @return A reference to the shader
|
||||
/// @throws std::out_of_range if the shader is not found
|
||||
///
|
||||
[[nodiscard]] const auto& baseShader(std::string_view name) const {
|
||||
return this->m_baseShaders.at(name);
|
||||
}
|
||||
|
||||
///
|
||||
/// Get a shader by name
|
||||
///
|
||||
/// @param name Shader name
|
||||
/// @param perf Whether to get the performance variant of the shader
|
||||
/// @return A reference to the shader
|
||||
/// @throws std::out_of_range if the shader is not found
|
||||
///
|
||||
[[nodiscard]] const auto& shader(std::string_view name, bool perf) const {
|
||||
auto it{this->m_baseShaders.find(name)};
|
||||
if (it != this->m_baseShaders.end())
|
||||
return it->second;
|
||||
|
||||
return perf ? this->m_performanceShaders.at(name) : this->m_qualityShaders.at(name);
|
||||
}
|
||||
|
||||
private:
|
||||
std::unordered_map<std::string_view, vk::UniqueShaderModule> m_baseShaders;
|
||||
std::unordered_map<std::string_view, vk::UniqueShaderModule> m_qualityShaders;
|
||||
std::unordered_map<std::string_view, vk::UniqueShaderModule> m_performanceShaders;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
@ -1,31 +1,27 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "dll_reader.hpp"
|
||||
#include "lsfg-vk-common/helpers/errors.hpp"
|
||||
#include "dll.hpp"
|
||||
|
||||
#include <ios>
|
||||
#include <unordered_map>
|
||||
#include <filesystem>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <ios>
|
||||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <stdexcept>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <array>
|
||||
#include <span>
|
||||
|
||||
using namespace lsfgvk;
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
namespace {
|
||||
/// DOS file header
|
||||
struct DOSHeader {
|
||||
uint16_t magic; // 0x5A4D
|
||||
std::array<uint16_t, 29> pad;
|
||||
int32_t pe_offset; // file offset
|
||||
int32_t pe_offset; // File offset
|
||||
};
|
||||
|
||||
/// PE header
|
||||
|
|
@ -42,15 +38,15 @@ namespace {
|
|||
struct PEOptionalHeader {
|
||||
uint16_t magic; // 0x20B
|
||||
std::array<uint16_t, 63> pad4;
|
||||
std::pair<uint32_t, uint32_t> resource_table; // file offset/size
|
||||
std::pair<uint32_t, uint32_t> resource_table; // File offset/size
|
||||
};
|
||||
|
||||
/// Section header
|
||||
struct SectionHeader {
|
||||
std::array<uint16_t, 4> pad1;
|
||||
uint32_t vsize; // virtual
|
||||
uint32_t vsize; // Virtual
|
||||
uint32_t vaddress;
|
||||
uint32_t fsize; // raw
|
||||
uint32_t fsize; // Raw
|
||||
uint32_t foffset;
|
||||
std::array<uint16_t, 8> pad2;
|
||||
};
|
||||
|
|
@ -65,7 +61,7 @@ namespace {
|
|||
/// Resource directory entry
|
||||
struct ResourceDirectoryEntry {
|
||||
uint32_t id;
|
||||
uint32_t offset; // high bit = directory
|
||||
uint32_t offset; // High bit = Directory
|
||||
};
|
||||
|
||||
/// Resource data entry
|
||||
|
|
@ -74,68 +70,68 @@ namespace {
|
|||
uint32_t size;
|
||||
std::array<uint32_t, 2> pad;
|
||||
};
|
||||
}
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wunknown-warning-option"
|
||||
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage-in-container"
|
||||
namespace {
|
||||
/// Safely cast a vector to a pointer of type T
|
||||
template<typename T>
|
||||
const T* safe_cast(const std::vector<uint8_t>& data, size_t offset) {
|
||||
const size_t end = offset + sizeof(T);
|
||||
const size_t end{offset + sizeof(T)};
|
||||
if (end > data.size() || end < offset)
|
||||
throw ls::error("buffer overflow/underflow during safe cast");
|
||||
return reinterpret_cast<const T*>(&data.at(offset));
|
||||
throw std::runtime_error("Buffer overflow/underflow during safe cast");
|
||||
return reinterpret_cast<const T*>(&data.at(offset)); // NOLINT (unsafe cast)
|
||||
}
|
||||
|
||||
/// Safely cast a vector to a span of T
|
||||
template<typename T>
|
||||
std::span<const T> span_cast(const std::vector<uint8_t>& data, size_t offset, size_t count) {
|
||||
const size_t end = offset + (count * sizeof(T));
|
||||
const size_t end{offset + (count * sizeof(T))};
|
||||
if (end > data.size() || end < offset)
|
||||
throw ls::error("buffer overflow/underflow during safe cast");
|
||||
return std::span<const T>(reinterpret_cast<const T*>(&data.at(offset)), count);
|
||||
throw std::runtime_error("Buffer overflow/underflow during safe cast");
|
||||
return{ reinterpret_cast<const T*>(&data.at(offset)), count }; // NOLINT (unsafe cast)
|
||||
}
|
||||
}
|
||||
#pragma clang diagnostic pop
|
||||
}
|
||||
|
||||
std::unordered_map<uint32_t, std::vector<uint8_t>> backend::extractResourcesFromDLL(
|
||||
const std::filesystem::path& dll) {
|
||||
using namespace lsfgvk::library;
|
||||
|
||||
std::unordered_map<uint32_t, std::vector<uint32_t>> priv::parseDll(
|
||||
const std::filesystem::path& dll
|
||||
) {
|
||||
std::ifstream file(dll, std::ios::binary | std::ios::ate);
|
||||
if (!file.is_open())
|
||||
throw ls::error("failed to open dll file");
|
||||
throw std::runtime_error("Unable to open file");
|
||||
|
||||
const std::streamsize size = static_cast<std::streamsize>(file.tellg());
|
||||
const std::streamsize size{static_cast<std::streamsize>(file.tellg())};
|
||||
file.seekg(0, std::ios::beg);
|
||||
|
||||
std::vector<uint8_t> data(static_cast<size_t>(size));
|
||||
if (!file.read(reinterpret_cast<char*>(data.data()), size))
|
||||
throw ls::error("failed to read dll file");
|
||||
if (!file.read(reinterpret_cast<char*>(data.data()), size)) // NOLINT (unsafe cast)
|
||||
throw std::runtime_error("Unable to read file");
|
||||
|
||||
// parse dos header
|
||||
size_t fileOffset = 0;
|
||||
const auto* dosHdr = safe_cast<const DOSHeader>(data, 0);
|
||||
// Parse dos header
|
||||
size_t fileOffset{0};
|
||||
const auto* dosHdr{safe_cast<const DOSHeader>(data, 0)};
|
||||
if (dosHdr->magic != 0x5A4D)
|
||||
throw ls::error("dos header magic number is incorrect");
|
||||
throw std::runtime_error("Magic number in DOS header is incorrect");
|
||||
|
||||
// parse pe header
|
||||
// Parse pe header
|
||||
fileOffset += static_cast<size_t>(dosHdr->pe_offset);
|
||||
const auto* peHdr = safe_cast<const PEHeader>(data, fileOffset);
|
||||
const auto* peHdr{safe_cast<const PEHeader>(data, fileOffset)};
|
||||
if (peHdr->signature != 0x00004550)
|
||||
throw ls::error("pe header signature is incorrect");
|
||||
throw std::runtime_error("Signature in PE header is incorrect");
|
||||
|
||||
// parse optional pe header
|
||||
// Parse optional pe header
|
||||
fileOffset += sizeof(PEHeader);
|
||||
const auto* peOptHdr = safe_cast<const PEOptionalHeader>(data, fileOffset);
|
||||
const auto* peOptHdr{safe_cast<const PEOptionalHeader>(data, fileOffset)};
|
||||
if (peOptHdr->magic != 0x20B)
|
||||
throw ls::error("pe format is not PE32+");
|
||||
throw std::runtime_error("PE format is not PE32+");
|
||||
const auto& [rsrc_rva, rsrc_size] = peOptHdr->resource_table;
|
||||
|
||||
// locate section containing resources
|
||||
// }Locate section containing resources
|
||||
std::optional<size_t> rsrc_offset;
|
||||
fileOffset += peHdr->opt_hdr_size;
|
||||
const auto sectHdrs = span_cast<const SectionHeader>(data, fileOffset, peHdr->sect_count);
|
||||
const auto sectHdrs{span_cast<const SectionHeader>(data, fileOffset, peHdr->sect_count)};
|
||||
for (const auto& sectHdr : sectHdrs) {
|
||||
if (rsrc_rva < sectHdr.vaddress || rsrc_rva > (sectHdr.vaddress + sectHdr.vsize))
|
||||
continue;
|
||||
|
|
@ -144,69 +140,71 @@ std::unordered_map<uint32_t, std::vector<uint8_t>> backend::extractResourcesFrom
|
|||
break;
|
||||
}
|
||||
if (!rsrc_offset)
|
||||
throw ls::error("unable to locate resource section");
|
||||
throw std::runtime_error("Unable to locate resource section");
|
||||
|
||||
// parse resource directory
|
||||
// Parse resource directory
|
||||
fileOffset = rsrc_offset.value();
|
||||
const auto* rsrcDir = safe_cast<const ResourceDirectory>(data, fileOffset);
|
||||
const auto* rsrcDir{safe_cast<const ResourceDirectory>(data, fileOffset)};
|
||||
if (rsrcDir->id_count < 3)
|
||||
throw ls::error("resource directory does not have enough entries");
|
||||
throw std::runtime_error("Resource directory does not have enough entries");
|
||||
|
||||
// find resource table with data type
|
||||
// Find resource table with data type
|
||||
std::optional<size_t> rsrc_tbl_offset;
|
||||
fileOffset = rsrc_offset.value() + sizeof(ResourceDirectory);
|
||||
const auto rsrcDirEntries = span_cast<const ResourceDirectoryEntry>(
|
||||
data, fileOffset, rsrcDir->name_count + rsrcDir->id_count);
|
||||
const auto rsrcDirEntries{span_cast<const ResourceDirectoryEntry>(
|
||||
data, fileOffset, rsrcDir->name_count + rsrcDir->id_count)};
|
||||
for (const auto& rsrcDirEntry : rsrcDirEntries) {
|
||||
if (rsrcDirEntry.id != 10) // RT_RCDATA
|
||||
continue;
|
||||
if ((rsrcDirEntry.offset & 0x80000000) == 0)
|
||||
throw ls::error("expected resource directory, found data entry");
|
||||
throw std::runtime_error("Expected resource directory, found data entry");
|
||||
|
||||
rsrc_tbl_offset.emplace(rsrcDirEntry.offset & 0x7FFFFFFF);
|
||||
}
|
||||
if (!rsrc_tbl_offset)
|
||||
throw ls::error("unabele to locate RT_RCDATA directory");
|
||||
throw std::runtime_error("Unable to locate RT_RCDATA directory");
|
||||
|
||||
// parse data type resource directory
|
||||
// Parse data type resource directory
|
||||
fileOffset = rsrc_offset.value() + rsrc_tbl_offset.value();
|
||||
const auto* rsrcTbl = safe_cast<const ResourceDirectory>(data, fileOffset);
|
||||
const auto* rsrcTbl{safe_cast<const ResourceDirectory>(data, fileOffset)};
|
||||
if (rsrcTbl->id_count < 1)
|
||||
throw ls::error("RT_RCDATA directory does not have enough entries");
|
||||
throw std::runtime_error("RT_RCDATA directory does not have enough entries");
|
||||
|
||||
// collect all resources
|
||||
// Collect all resources
|
||||
fileOffset += sizeof(ResourceDirectory);
|
||||
const auto rsrcTblEntries = span_cast<const ResourceDirectoryEntry>(
|
||||
data, fileOffset, rsrcTbl->name_count + rsrcTbl->id_count);
|
||||
std::unordered_map<uint32_t, std::vector<uint8_t>> resources;
|
||||
const auto rsrcTblEntries{span_cast<const ResourceDirectoryEntry>(
|
||||
data, fileOffset, rsrcTbl->name_count + rsrcTbl->id_count)};
|
||||
|
||||
std::unordered_map<uint32_t, std::vector<uint32_t>> resources;
|
||||
resources.reserve(rsrcTbl->id_count);
|
||||
|
||||
for (const auto& rsrcTblEntry : rsrcTblEntries) {
|
||||
if ((rsrcTblEntry.offset & 0x80000000) == 0)
|
||||
throw ls::error("expected resource directory, found data entry");
|
||||
throw std::runtime_error("Expected resource directory, found data entry");
|
||||
|
||||
// skip over language directory
|
||||
// Skip over language directory
|
||||
fileOffset = rsrc_offset.value() + (rsrcTblEntry.offset & 0x7FFFFFFF);
|
||||
const auto* langDir = safe_cast<const ResourceDirectory>(data, fileOffset);
|
||||
if (langDir->id_count < 1)
|
||||
throw ls::error("Incorrect language directory");
|
||||
throw std::runtime_error("Malformed language directory");
|
||||
|
||||
fileOffset += sizeof(ResourceDirectory);
|
||||
const auto* langDirEntry = safe_cast<const ResourceDirectoryEntry>(data, fileOffset);
|
||||
const auto* langDirEntry{safe_cast<const ResourceDirectoryEntry>(data, fileOffset)};
|
||||
if ((langDirEntry->offset & 0x80000000) != 0)
|
||||
throw ls::error("expected resource data entry, but found directory");
|
||||
throw std::runtime_error("Expected resource data entry, found directory");
|
||||
|
||||
// parse resource data entry
|
||||
// Parse resource data entry
|
||||
fileOffset = rsrc_offset.value() + (langDirEntry->offset & 0x7FFFFFFF);
|
||||
const auto* entry = safe_cast<const ResourceDataEntry>(data, fileOffset);
|
||||
const auto* entry{safe_cast<const ResourceDataEntry>(data, fileOffset)};
|
||||
if (entry->offset < rsrc_rva || entry->offset > (rsrc_rva + rsrc_size))
|
||||
throw ls::error("resource data entry points outside resource section");
|
||||
throw std::runtime_error("Resource data entry points outside resource section");
|
||||
|
||||
// extract resource
|
||||
std::vector<uint8_t> resource(entry->size);
|
||||
// Extract resource
|
||||
fileOffset = (entry->offset - rsrc_rva) + rsrc_offset.value();
|
||||
if (fileOffset + entry->size > data.size())
|
||||
throw ls::error("resource data entry points outside file");
|
||||
std::copy_n(&data.at(fileOffset), entry->size, resource.data());
|
||||
resources.emplace(rsrcTblEntry.id, std::move(resource));
|
||||
const auto rdata{span_cast<const uint32_t>(
|
||||
data, fileOffset, entry->size / sizeof(uint32_t))};
|
||||
|
||||
resources.emplace(rsrcTblEntry.id, std::vector<uint32_t>(rdata.begin(), rdata.end()));
|
||||
}
|
||||
|
||||
return resources;
|
||||
23
lsfg-vk-backend/src/modules/library/dll.hpp
Normal file
23
lsfg-vk-backend/src/modules/library/dll.hpp
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <filesystem>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
namespace lsfgvk::library::priv {
|
||||
|
||||
///
|
||||
/// Parse all resources from a DLL file
|
||||
///
|
||||
/// @param dll File path
|
||||
/// @returns Map of resource ID to data
|
||||
/// @throws std::runtime_error if the file is invalid or cannot be read
|
||||
///
|
||||
std::unordered_map<uint32_t, std::vector<uint32_t>> parseDll(
|
||||
const std::filesystem::path& dll
|
||||
);
|
||||
|
||||
}
|
||||
838
lsfg-vk-backend/src/modules/pipeline.cpp
Normal file
838
lsfg-vk-backend/src/modules/pipeline.cpp
Normal file
|
|
@ -0,0 +1,838 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "pipeline.hpp"
|
||||
#include "library.hpp"
|
||||
#include "modules/pipeline/signature.hpp"
|
||||
#include "modules/pipeline/signature/helpers.hpp"
|
||||
#include "modules/pipeline/signature/image.hpp"
|
||||
#include "modules/pipeline/signature/pass.hpp"
|
||||
#include "utility/vkhelper.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
using namespace lsfgvk::pipeline;
|
||||
|
||||
namespace {
|
||||
/// Helper method to apply extent operations
|
||||
vk::Extent2D apply(
|
||||
const vk::Extent2D& base,
|
||||
const vk::Extent2D& flow,
|
||||
const ExtentOp& op
|
||||
) {
|
||||
vk::Extent2D result{op.flow() ? flow : base};
|
||||
for (const auto& [add, shift] : op.operations()) {
|
||||
result.width = (result.width + add) >> shift;
|
||||
result.height = (result.height + add) >> shift;
|
||||
}
|
||||
return { result.width, result.height };
|
||||
}
|
||||
}
|
||||
|
||||
Pipeline::Pipeline(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
const vk::Queue& queue,
|
||||
uint32_t queueFamilyIndex,
|
||||
const library::ShaderLibrary& library,
|
||||
const PipelineSignature& signature,
|
||||
vk::Extent2D extent,
|
||||
float flow,
|
||||
bool perf,
|
||||
bool hdr
|
||||
) {
|
||||
// Build the Vulkan descriptor set layout
|
||||
uint32_t sampledImageCount{};
|
||||
uint32_t storageImageCount{};
|
||||
|
||||
std::vector<vk::DescriptorSetLayoutBinding> bindings;
|
||||
bindings.reserve(4 + signature.descriptors.size());
|
||||
|
||||
bindings.push_back({
|
||||
.binding = 0,
|
||||
.descriptorType = vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute
|
||||
});
|
||||
|
||||
for (uint32_t i = 1; i <= 3; i++) {
|
||||
bindings.push_back({
|
||||
.binding = i,
|
||||
.descriptorType = vk::DescriptorType::eSampler,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute
|
||||
});
|
||||
}
|
||||
|
||||
uint32_t bindingIdx{4};
|
||||
for (const auto& binding : signature.descriptors) {
|
||||
uint32_t descriptorCount{static_cast<uint32_t>(binding.resources.size())};
|
||||
if (descriptorCount == 1) {
|
||||
const auto& image{signature.images.at(binding.resources.front())};
|
||||
if (image.flags & ImageFlag::Mipmaps)
|
||||
descriptorCount = image.count;
|
||||
}
|
||||
|
||||
bindings.push_back({
|
||||
.binding = bindingIdx++,
|
||||
.descriptorType = binding.type == BindingType::StorageImage ?
|
||||
vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage,
|
||||
.descriptorCount = descriptorCount,
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute
|
||||
});
|
||||
|
||||
if (binding.type == BindingType::StorageImage)
|
||||
storageImageCount += descriptorCount;
|
||||
else
|
||||
sampledImageCount += descriptorCount;
|
||||
}
|
||||
|
||||
auto [layout, pipelineLayout] = vkhelper::createLayout(
|
||||
dld,
|
||||
device,
|
||||
bindings,
|
||||
sizeof(PushConstants)
|
||||
);
|
||||
this->m_layout = {
|
||||
.layout = std::move(layout),
|
||||
.pipelineLayout = std::move(pipelineLayout)
|
||||
};
|
||||
|
||||
// Create the Vulkan images
|
||||
vk::DeviceSize alignment{};
|
||||
uint32_t types{~0U};
|
||||
|
||||
const vk::Extent2D flowExtent{
|
||||
static_cast<uint32_t>(static_cast<float>(extent.width) * flow),
|
||||
static_cast<uint32_t>(static_cast<float>(extent.height) * flow)
|
||||
};
|
||||
for (const auto& imageSignature : signature.images) {
|
||||
const auto imageIdx{this->m_images.size()};
|
||||
auto& image{this->m_images.emplace_back()};
|
||||
image = {
|
||||
.signature = imageSignature
|
||||
};
|
||||
|
||||
const bool hasHdrVariant{image.signature.flags & ImageFlag::HdrVariant};
|
||||
const vk::Format format{
|
||||
(hasHdrVariant && hdr) ?
|
||||
static_cast<vk::Format>(image.signature.hdrFormat) :
|
||||
static_cast<vk::Format>(image.signature.format)
|
||||
};
|
||||
const vk::Extent2D baseExtent{apply(extent, flowExtent, image.signature.extentOp)};
|
||||
const vk::ImageUsageFlags usage{
|
||||
vk::ImageUsageFlagBits::eStorage | vk::ImageUsageFlagBits::eSampled
|
||||
};
|
||||
|
||||
const bool isMipmapped{image.signature.flags & ImageFlag::Mipmaps};
|
||||
for (uint32_t i = 0; i < image.signature.count; i++) {
|
||||
const vk::Extent2D imageExtent{
|
||||
.width = std::max(baseExtent.width >> i, 1U),
|
||||
.height = std::max(baseExtent.height >> i, 1U)
|
||||
};
|
||||
|
||||
if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput)) {
|
||||
const bool isInputOr{image.signature.flags & ImageFlag::ExternalInput};
|
||||
|
||||
auto [subimage, allocation] = vkhelper::createExternalImage(
|
||||
dld,
|
||||
device,
|
||||
physdev,
|
||||
imageExtent,
|
||||
format,
|
||||
image.signature.count,
|
||||
usage |
|
||||
(isInputOr ?
|
||||
vk::ImageUsageFlagBits::eTransferDst
|
||||
: vk::ImageUsageFlagBits::eTransferSrc)
|
||||
);
|
||||
|
||||
if (isInputOr) {
|
||||
this->m_externalInputs.push_back({
|
||||
.extent = imageExtent,
|
||||
.format = format,
|
||||
.layers = image.signature.count,
|
||||
.image = *subimage,
|
||||
.memory = *allocation
|
||||
});
|
||||
} else {
|
||||
this->m_externalOutputs.push_back({
|
||||
.extent = imageExtent,
|
||||
.format = format,
|
||||
.layers = image.signature.count,
|
||||
.image = *subimage,
|
||||
.memory = *allocation
|
||||
});
|
||||
}
|
||||
|
||||
image.subimages.push_back({
|
||||
.image = std::move(subimage)
|
||||
});
|
||||
this->m_externalAllocations[imageIdx] = std::move(allocation);
|
||||
|
||||
break; // There can only be one image
|
||||
}
|
||||
|
||||
image.subimages.push_back({
|
||||
.image = vkhelper::createImage(
|
||||
dld,
|
||||
device,
|
||||
imageExtent,
|
||||
format,
|
||||
isMipmapped ? 1 : image.signature.count,
|
||||
usage
|
||||
)
|
||||
});
|
||||
|
||||
if (!isMipmapped) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& subimage : image.subimages) {
|
||||
subimage.memory = device.getImageMemoryRequirements(*subimage.image, dld);
|
||||
|
||||
if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput))
|
||||
break;
|
||||
|
||||
alignment = std::max(alignment, subimage.memory.alignment);
|
||||
types &= subimage.memory.memoryTypeBits;
|
||||
}
|
||||
}
|
||||
|
||||
if (types == 0)
|
||||
throw std::runtime_error("No compatible memory type found for pipeline images");
|
||||
|
||||
// Fill in image sizes in respect to alignment
|
||||
for (auto& image : this->m_images) {
|
||||
if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput))
|
||||
continue; // External inputs have dedicated allocations
|
||||
|
||||
for (const auto& subimage : image.subimages) {
|
||||
image.size += vkhelper::align(subimage.memory.size, alignment);
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate optimal-ish allocations using heuristics & greedy fit strategy
|
||||
std::vector<size_t> images(signature.images.size());
|
||||
std::iota(images.begin(), images.end(), 0);
|
||||
|
||||
std::ranges::sort(images, [&](const auto& a, const auto& b) {
|
||||
return this->m_images.at(a).size > this->m_images.at(b).size;
|
||||
});
|
||||
|
||||
std::vector<size_t> placements;
|
||||
for (const auto& imageIdx : images) {
|
||||
const auto& image{this->m_images.at(imageIdx)};
|
||||
if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput))
|
||||
continue;
|
||||
|
||||
auto& allocation{
|
||||
(image.signature.flags & ImageFlag::Pinned)
|
||||
? this->m_allocations.at(1)
|
||||
: this->m_allocations.at(0)
|
||||
};
|
||||
auto& segment{allocation.segments.emplace_back()};
|
||||
|
||||
vk::DeviceSize size{};
|
||||
for (const auto& subimage : image.subimages) {
|
||||
const vk::DeviceSize alignedSize{vkhelper::align(subimage.memory.size, alignment)};
|
||||
segment.subsegments.push_back({
|
||||
.size = alignedSize,
|
||||
.offset = size
|
||||
});
|
||||
|
||||
size += alignedSize;
|
||||
}
|
||||
|
||||
if (image.signature.flags & ImageFlag::Pinned) {
|
||||
segment = {
|
||||
.imageIdx = imageIdx,
|
||||
.subsegments = segment.subsegments,
|
||||
.size = size,
|
||||
.offset = allocation.size,
|
||||
};
|
||||
allocation.size += size;
|
||||
} else {
|
||||
const auto lifetime{image.signature.lifetime};
|
||||
|
||||
vk::DeviceSize offset{};
|
||||
for (const auto& otherSegmentIdx : placements) {
|
||||
const auto& otherSegment{allocation.segments.at(otherSegmentIdx)};
|
||||
if (otherSegment.imageIdx == imageIdx)
|
||||
continue; // Skip self
|
||||
|
||||
const auto& otherImage{this->m_images.at(otherSegment.imageIdx)};
|
||||
const auto& otherLifetime{otherImage.signature.lifetime};
|
||||
|
||||
if (lifetime.first > otherLifetime.second ||
|
||||
lifetime.second < otherLifetime.first)
|
||||
continue; // Skip horizontally non-overlapping
|
||||
|
||||
if (offset >= (otherSegment.offset + otherSegment.size) ||
|
||||
otherSegment.offset >= (offset + size))
|
||||
continue; // Skip vertically non-overlapping
|
||||
|
||||
offset = otherSegment.offset + otherSegment.size;
|
||||
}
|
||||
|
||||
allocation.size = std::max(allocation.size, offset + size);
|
||||
segment = {
|
||||
.imageIdx = imageIdx,
|
||||
.subsegments = segment.subsegments,
|
||||
.size = size,
|
||||
.offset = offset,
|
||||
};
|
||||
|
||||
const size_t i{allocation.segments.size() - 1};
|
||||
auto it{std::ranges::upper_bound(placements, i,
|
||||
[&](const auto& a, const auto& b) {
|
||||
return allocation.segments.at(a).offset < allocation.segments.at(b).offset;
|
||||
}
|
||||
)};
|
||||
placements.insert(it, i);
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate the memory & bind the images
|
||||
for (auto& allocation : this->m_allocations) {
|
||||
allocation.memory = vkhelper::allocateMemory(
|
||||
dld,
|
||||
device,
|
||||
physdev,
|
||||
allocation.size,
|
||||
types
|
||||
);
|
||||
|
||||
for (const auto& segment : allocation.segments) {
|
||||
const auto& image{this->m_images.at(segment.imageIdx)};
|
||||
|
||||
for (size_t i = 0; i < image.subimages.size(); i++) {
|
||||
const auto& subsegment{segment.subsegments.at(i)};
|
||||
const auto& subimage{image.subimages.at(i)};
|
||||
|
||||
device.bindImageMemory(
|
||||
*subimage.image,
|
||||
*allocation.memory,
|
||||
segment.offset + subsegment.offset,
|
||||
dld
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Create image views
|
||||
for (auto& image : this->m_images) {
|
||||
const bool hasHdrVariant{image.signature.flags & ImageFlag::HdrVariant};
|
||||
const bool isLayered{image.subimages.size() == 1 && image.signature.count > 1};
|
||||
|
||||
for (auto& subimage : image.subimages) {
|
||||
subimage.view = vkhelper::createImageView(
|
||||
dld,
|
||||
device,
|
||||
*subimage.image,
|
||||
static_cast<vk::Format>((hasHdrVariant && hdr)
|
||||
? image.signature.hdrFormat : image.signature.format),
|
||||
isLayered ? image.signature.count : 1
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Create the descriptor set & required resources
|
||||
auto [pool, set] = vkhelper::createDescriptorSet(
|
||||
dld,
|
||||
device,
|
||||
*this->m_layout.layout,
|
||||
3, 1, sampledImageCount, storageImageCount
|
||||
);
|
||||
this->m_descriptorSet.pool = std::move(pool);
|
||||
this->m_descriptorSet.set = set;
|
||||
|
||||
const UniformBuffer buf{
|
||||
.advancedColorKind = hdr ? 2U : 0U,
|
||||
.hdrSupport = hdr ? 1U : 0U,
|
||||
.resolutionInvScale = 1.0F / flow,
|
||||
.uiThreshold = 0.5F
|
||||
};
|
||||
this->m_descriptorSet.buffer = vkhelper::createBuffer(
|
||||
dld,
|
||||
device,
|
||||
physdev,
|
||||
buf
|
||||
);
|
||||
auto* mapped{static_cast<UniformBuffer*>(
|
||||
device.mapMemory(
|
||||
*this->m_descriptorSet.buffer.second,
|
||||
0,
|
||||
VK_WHOLE_SIZE,
|
||||
{},
|
||||
dld
|
||||
)
|
||||
)};
|
||||
this->m_descriptorSet.mappedBuffer = std::shared_ptr<UniformBuffer*>(
|
||||
new UniformBuffer*{mapped},
|
||||
[device, memory = *this->m_descriptorSet.buffer.second, dld](auto* ptr) {
|
||||
device.unmapMemory(memory, dld);
|
||||
delete ptr; // NOLINT (manual memory management)
|
||||
}
|
||||
);
|
||||
this->m_descriptorSet.samplers.at(0) = vkhelper::createSampler(
|
||||
dld,
|
||||
device,
|
||||
vk::SamplerAddressMode::eClampToBorder,
|
||||
vk::CompareOp::eNever,
|
||||
false
|
||||
);
|
||||
this->m_descriptorSet.samplers.at(1) = vkhelper::createSampler(
|
||||
dld,
|
||||
device,
|
||||
vk::SamplerAddressMode::eClampToBorder,
|
||||
vk::CompareOp::eNever,
|
||||
true
|
||||
);
|
||||
this->m_descriptorSet.samplers.at(2) = vkhelper::createSampler(
|
||||
dld,
|
||||
device,
|
||||
vk::SamplerAddressMode::eClampToEdge,
|
||||
vk::CompareOp::eAlways,
|
||||
false
|
||||
);
|
||||
|
||||
// Update descriptor set bindings
|
||||
std::vector<vk::WriteDescriptorSet> writeInfos(4 + signature.descriptors.size());
|
||||
bindingIdx = 0;
|
||||
|
||||
std::array<vk::DescriptorBufferInfo, 1> bufferInfos;
|
||||
bufferInfos.at(0) = {
|
||||
.buffer = *this->m_descriptorSet.buffer.first,
|
||||
.range = VK_WHOLE_SIZE
|
||||
};
|
||||
writeInfos.at(0) = {
|
||||
.dstSet = this->m_descriptorSet.set,
|
||||
.dstBinding = bindingIdx++,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eUniformBuffer,
|
||||
.pBufferInfo = bufferInfos.data()
|
||||
};
|
||||
|
||||
std::array<vk::DescriptorImageInfo, 3> samplerInfos;
|
||||
for (uint32_t i = 0; i < 3; i++) {
|
||||
auto& writeInfo{writeInfos.at(bindingIdx)};
|
||||
|
||||
samplerInfos.at(i) = {
|
||||
.sampler = *this->m_descriptorSet.samplers.at(i)
|
||||
};
|
||||
writeInfo = {
|
||||
.dstSet = this->m_descriptorSet.set,
|
||||
.dstBinding = bindingIdx++,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = vk::DescriptorType::eSampler,
|
||||
.pImageInfo = &samplerInfos.at(i)
|
||||
};
|
||||
}
|
||||
|
||||
std::vector<std::vector<vk::DescriptorImageInfo>> imageInfos2D(signature.descriptors.size());
|
||||
for (const auto& binding : signature.descriptors) {
|
||||
auto& writeInfo{writeInfos.at(bindingIdx)};
|
||||
|
||||
auto& imageInfos{imageInfos2D.at(bindingIdx - 4)};
|
||||
imageInfos.reserve(binding.resources.size());
|
||||
|
||||
for (const auto& resourceIdx : binding.resources) {
|
||||
const auto& image{this->m_images.at(resourceIdx)};
|
||||
|
||||
for (const auto& subimage : image.subimages) {
|
||||
imageInfos.push_back({
|
||||
.imageView = *subimage.view,
|
||||
.imageLayout = vk::ImageLayout::eGeneral
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
writeInfo = {
|
||||
.dstSet = this->m_descriptorSet.set,
|
||||
.dstBinding = bindingIdx++,
|
||||
.descriptorCount = static_cast<uint32_t>(imageInfos.size()),
|
||||
.descriptorType = binding.type == BindingType::StorageImage ?
|
||||
vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage,
|
||||
.pImageInfo = imageInfos.data()
|
||||
};
|
||||
}
|
||||
|
||||
device.updateDescriptorSets(writeInfos, {}, dld);
|
||||
|
||||
// Build all shader pipelines
|
||||
std::vector<vk::ComputePipelineCreateInfo> pipelineCreateInfos;
|
||||
for (const auto& [name, variant] : signature.shaders) {
|
||||
std::string name2{name};
|
||||
if (variant) name2 += hdr ? "_16bit" : "_8bit";
|
||||
|
||||
const auto& module{library.shader(name2, perf)};
|
||||
|
||||
pipelineCreateInfos.push_back({
|
||||
.stage = {
|
||||
.stage = vk::ShaderStageFlagBits::eCompute,
|
||||
.module = *module,
|
||||
.pName = "main"
|
||||
},
|
||||
.layout = *this->m_layout.pipelineLayout
|
||||
});
|
||||
}
|
||||
|
||||
const std::string_view cacheTag{perf ? "performance" : "quality"};
|
||||
auto [cache, isCacheValid] = vkhelper::createPipelineCache(
|
||||
dld,
|
||||
device,
|
||||
physdev,
|
||||
cacheTag
|
||||
);
|
||||
this->m_cache = std::move(cache);
|
||||
|
||||
std::vector<vk::UniquePipeline> pipelines{
|
||||
device.createComputePipelinesUnique(
|
||||
*this->m_cache,
|
||||
pipelineCreateInfos,
|
||||
nullptr,
|
||||
dld
|
||||
).value
|
||||
};
|
||||
|
||||
if (!isCacheValid) {
|
||||
vkhelper::persistPipelineCache(
|
||||
dld,
|
||||
device,
|
||||
physdev,
|
||||
*this->m_cache,
|
||||
cacheTag
|
||||
);
|
||||
}
|
||||
|
||||
this->m_pipelines.reserve(signature.shaders.size());
|
||||
for (size_t i = 0; i < signature.shaders.size(); i++) {
|
||||
const auto& name{signature.shaders.at(i).first};
|
||||
this->m_pipelines.emplace(name, std::move(pipelines.at(i)));
|
||||
}
|
||||
|
||||
// Build pipeline stages
|
||||
std::unordered_map<std::string_view, uint32_t> indices;
|
||||
for (const auto& stageSignature : signature.stages) {
|
||||
auto& stage{this->m_stages.emplace_back()};
|
||||
stage.substages.emplace_back();
|
||||
|
||||
for (const auto& passIdx : stageSignature.passes) { // (Sorted by shader)
|
||||
const auto& pass{signature.passes.at(passIdx)};
|
||||
|
||||
for (const auto& resource : pass.inputs) {
|
||||
if (!resource.idx())
|
||||
continue;
|
||||
stage.sampledImages.push_back(*resource.idx());
|
||||
}
|
||||
for (const auto& resource : pass.outputs) {
|
||||
if (!resource.idx())
|
||||
continue;
|
||||
stage.storageImages.push_back(*resource.idx());
|
||||
}
|
||||
|
||||
auto& lastPipeline{stage.substages.back().pipeline};
|
||||
if (!lastPipeline.empty() && lastPipeline != pass.shader) {
|
||||
stage.substages.emplace_back();
|
||||
}
|
||||
|
||||
auto& substage{stage.substages.back()};
|
||||
substage.pipeline = pass.shader;
|
||||
substage.subiterations.push_back({
|
||||
.iterationIndex = indices[substage.pipeline]++,
|
||||
.dispatch = apply(extent, flowExtent, pass.dispatchOp),
|
||||
.isSpecial = pass.flags & PassFlag::Special
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Transition all images into general layout
|
||||
this->m_pool = vkhelper::createCommandPool(
|
||||
dld,
|
||||
device,
|
||||
queueFamilyIndex
|
||||
);
|
||||
|
||||
std::vector<vk::ImageMemoryBarrier2KHR> barriers;
|
||||
for (const auto& image : this->m_images) {
|
||||
for (const auto& subimage : image.subimages) {
|
||||
barriers.push_back({
|
||||
.newLayout = vk::ImageLayout::eGeneral,
|
||||
.image = *subimage.image,
|
||||
.subresourceRange = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.levelCount = 1,
|
||||
.layerCount = image.subimages.size() == 1 ? image.signature.count : 1
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
const auto layoutCmdbuf{
|
||||
vkhelper::createCommandBuffer(dld, device, *this->m_pool)
|
||||
};
|
||||
|
||||
layoutCmdbuf->begin({ .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit }, dld);
|
||||
layoutCmdbuf->pipelineBarrier2KHR({
|
||||
.imageMemoryBarrierCount = static_cast<uint32_t>(barriers.size()),
|
||||
.pImageMemoryBarriers = barriers.data()
|
||||
}, dld);
|
||||
layoutCmdbuf->end(dld);
|
||||
|
||||
const auto fence{device.createFenceUnique({}, nullptr, dld)};
|
||||
queue.submit(
|
||||
{{
|
||||
.commandBufferCount = 1,
|
||||
.pCommandBuffers = &*layoutCmdbuf
|
||||
}},
|
||||
*fence,
|
||||
dld
|
||||
);
|
||||
if (device.waitForFences(*fence, VK_TRUE, 50'000'000, dld) != vk::Result::eSuccess) {
|
||||
throw std::runtime_error("Failed to wait for image layout transition fence");
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < signature.splitIndices.size() + 1; i++) {
|
||||
auto& cmdbuf{this->m_cmdbufs.emplace_back()};
|
||||
cmdbuf = vkhelper::createCommandBuffer(dld, device, *this->m_pool);
|
||||
cmdbuf->begin({ .flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse }, dld);
|
||||
|
||||
cmdbuf->bindDescriptorSets(
|
||||
vk::PipelineBindPoint::eCompute,
|
||||
*this->m_layout.pipelineLayout,
|
||||
0,
|
||||
this->m_descriptorSet.set,
|
||||
{},
|
||||
dld
|
||||
);
|
||||
}
|
||||
|
||||
size_t currentStageIndex{0};
|
||||
size_t currentStageBound{
|
||||
signature.splitIndices.empty() ? signature.passes.size() : signature.splitIndices.front()
|
||||
};
|
||||
|
||||
std::vector<vk::ImageMemoryBarrier2KHR> barrierVector;
|
||||
barrierVector.reserve(16);
|
||||
|
||||
std::unordered_map<VkImage, vk::ImageMemoryBarrier2KHR> stageBarriers;
|
||||
for (size_t i = 0; i < this->m_stages.size(); i++) {
|
||||
if (i == currentStageBound) {
|
||||
currentStageIndex++;
|
||||
currentStageBound = currentStageIndex < signature.splitIndices.size() ?
|
||||
signature.splitIndices.at(currentStageIndex) : signature.passes.size();
|
||||
}
|
||||
|
||||
const auto& stage{this->m_stages.at(i)};
|
||||
const auto& cmdbuf{this->m_cmdbufs.at(currentStageIndex)};
|
||||
|
||||
// Append barriers for this stage
|
||||
for (const auto& sampledImage : stage.sampledImages) {
|
||||
const auto& image = this->m_images.at(sampledImage);
|
||||
for (const auto& subimage : image.subimages) {
|
||||
auto imageHandle{static_cast<const VkImage>(*subimage.image)}; // NOLINT (32-bit)
|
||||
if (stageBarriers.contains(imageHandle)) {
|
||||
stageBarriers[imageHandle].dstAccessMask = vk::AccessFlagBits2::eShaderRead;
|
||||
continue;
|
||||
}
|
||||
|
||||
stageBarriers[imageHandle] = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eNone,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
|
||||
.image = *subimage.image,
|
||||
.subresourceRange = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.levelCount = 1,
|
||||
.layerCount = image.subimages.size() == 1 ? image.signature.count : 1
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
for (const auto& storageImage : stage.storageImages) {
|
||||
const auto& image = this->m_images.at(storageImage);
|
||||
for (const auto& subimage : image.subimages) {
|
||||
auto imageHandle{static_cast<const VkImage>(*subimage.image)}; // NOLINT (32-bit)
|
||||
if (stageBarriers.contains(imageHandle)) {
|
||||
stageBarriers[imageHandle].dstAccessMask = vk::AccessFlagBits2::eShaderWrite;
|
||||
continue;
|
||||
}
|
||||
|
||||
stageBarriers[imageHandle] = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eNone,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eShaderWrite,
|
||||
.image = *subimage.image,
|
||||
.subresourceRange = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.levelCount = 1,
|
||||
.layerCount = image.subimages.size() == 1 ? image.signature.count : 1
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
barrierVector.clear();
|
||||
for (const auto& [_, barrier] : stageBarriers) // NOLINT (nondeterministic order)
|
||||
barrierVector.push_back(barrier);
|
||||
stageBarriers.clear();
|
||||
cmdbuf->pipelineBarrier2KHR({
|
||||
.imageMemoryBarrierCount = static_cast<uint32_t>(barrierVector.size()),
|
||||
.pImageMemoryBarriers = barrierVector.data()
|
||||
}, dld);
|
||||
|
||||
for (const auto& substage : stage.substages) {
|
||||
// Bind shader pipeline for this stage
|
||||
const auto& pipeline = this->m_pipelines.at(substage.pipeline);
|
||||
cmdbuf->bindPipeline(vk::PipelineBindPoint::eCompute, *pipeline, dld);
|
||||
|
||||
// Dispatch all subiterations for this stage
|
||||
for (const auto& subiteration : substage.subiterations) {
|
||||
const PushConstants pushConstants{
|
||||
.specialFlag = subiteration.isSpecial ? 1U : 0U,
|
||||
.subiteration = subiteration.iterationIndex
|
||||
};
|
||||
cmdbuf->pushConstants(
|
||||
*this->m_layout.pipelineLayout,
|
||||
vk::ShaderStageFlagBits::eCompute,
|
||||
0,
|
||||
sizeof(PushConstants),
|
||||
&pushConstants,
|
||||
dld
|
||||
);
|
||||
|
||||
const auto& dispatch{subiteration.dispatch};
|
||||
cmdbuf->dispatch(dispatch.width, dispatch.height, 1, dld);
|
||||
}
|
||||
}
|
||||
|
||||
// Append barriers for next stage
|
||||
for (const auto& sampledImage : stage.sampledImages) {
|
||||
const auto& image = this->m_images.at(sampledImage);
|
||||
for (const auto& subimage : image.subimages) {
|
||||
stageBarriers[static_cast<VkImage>(*subimage.image)] = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eShaderRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
|
||||
.image = *subimage.image,
|
||||
.subresourceRange = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.levelCount = 1,
|
||||
.layerCount = image.subimages.size() == 1 ? image.signature.count : 1
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
for (const auto& storageImage : stage.storageImages) {
|
||||
const auto& image = this->m_images.at(storageImage);
|
||||
for (const auto& subimage : image.subimages) {
|
||||
stageBarriers[static_cast<VkImage>(*subimage.image)] = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
|
||||
.image = *subimage.image,
|
||||
.subresourceRange = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.levelCount = 1,
|
||||
.layerCount = image.subimages.size() == 1 ? image.signature.count : 1
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Skip barriers on switch between passes
|
||||
if (i + 1 == currentStageBound) {
|
||||
stageBarriers.clear();
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& cmdbuf : this->m_cmdbufs) {
|
||||
cmdbuf->end(dld);
|
||||
}
|
||||
}
|
||||
|
||||
vk::CommandBuffer Pipeline::buildTransCmdbuf(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
uint32_t iteration,
|
||||
uint32_t index,
|
||||
uint32_t total
|
||||
) {
|
||||
const bool persist{total > 8};
|
||||
const uint64_t key{persist ? ((static_cast<uint64_t>(index) << 32) | total) : index};
|
||||
|
||||
if (persist && this->m_transCmdbufs.contains(key))
|
||||
return *this->m_transCmdbufs.at(key);
|
||||
|
||||
auto& cmdbuf{this->m_transCmdbufs[key]};
|
||||
cmdbuf = vkhelper::createCommandBuffer(
|
||||
dld,
|
||||
device,
|
||||
*this->m_pool
|
||||
);
|
||||
|
||||
cmdbuf->begin({
|
||||
.flags = persist ? vk::CommandBufferUsageFlagBits::eSimultaneousUse :
|
||||
vk::CommandBufferUsageFlagBits::eOneTimeSubmit
|
||||
}, dld);
|
||||
|
||||
vk::BufferMemoryBarrier2KHR barrier{
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eUniformRead,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.buffer = *this->m_descriptorSet.buffer.first,
|
||||
.size = 4
|
||||
};
|
||||
cmdbuf->pipelineBarrier2KHR({
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &barrier
|
||||
}, dld);
|
||||
|
||||
const UniformBuffer buf{
|
||||
.timestamp = static_cast<float>(index + 1) / static_cast<float>(total + 1),
|
||||
.iteration = iteration
|
||||
};
|
||||
cmdbuf->updateBuffer(
|
||||
*this->m_descriptorSet.buffer.first,
|
||||
0,
|
||||
4,
|
||||
static_cast<const void*>(&buf.timestamp),
|
||||
dld
|
||||
);
|
||||
|
||||
barrier = {
|
||||
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
|
||||
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
|
||||
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
|
||||
.dstAccessMask = vk::AccessFlagBits2::eUniformRead,
|
||||
.buffer = *this->m_descriptorSet.buffer.first,
|
||||
.size = 4
|
||||
};
|
||||
cmdbuf->pipelineBarrier2KHR({
|
||||
.bufferMemoryBarrierCount = 1,
|
||||
.pBufferMemoryBarriers = &barrier
|
||||
}, dld);
|
||||
|
||||
cmdbuf->end(dld);
|
||||
|
||||
return *cmdbuf;
|
||||
}
|
||||
225
lsfg-vk-backend/src/modules/pipeline.hpp
Normal file
225
lsfg-vk-backend/src/modules/pipeline.hpp
Normal file
|
|
@ -0,0 +1,225 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "library.hpp"
|
||||
#include "pipeline/signature.hpp"
|
||||
#include "pipeline/signature/image.hpp"
|
||||
#include "utility/vkhelper.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace lsfgvk::pipeline {
|
||||
|
||||
/// Handle to an external image
|
||||
struct ExternalImage {
|
||||
/// Image Extent
|
||||
vk::Extent2D extent;
|
||||
/// Image Format
|
||||
vk::Format format;
|
||||
/// Amount of layers in image
|
||||
uint32_t layers;
|
||||
|
||||
/// Handle to the Vulkan image (not owned)
|
||||
vk::Image image;
|
||||
/// Handle to the Vulkan memory (not owned)
|
||||
vk::DeviceMemory memory;
|
||||
};
|
||||
|
||||
/// Struct for the uniform buffer
|
||||
struct UniformBuffer {
|
||||
float timestamp;
|
||||
uint32_t iteration;
|
||||
uint32_t advancedColorKind;
|
||||
uint32_t hdrSupport;
|
||||
float resolutionInvScale;
|
||||
float uiThreshold;
|
||||
};
|
||||
|
||||
/// Struct for push constants
|
||||
struct PushConstants {
|
||||
uint32_t specialFlag;
|
||||
uint32_t subiteration;
|
||||
};
|
||||
|
||||
///
|
||||
/// Vulkan pipeline created from a signature
|
||||
///
|
||||
class Pipeline {
|
||||
public:
|
||||
///
|
||||
/// Create a new pipeline
|
||||
///
|
||||
/// @param dld Vulkan dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param physdev Vulkan physical device
|
||||
/// @param queue Vulkan compute queue
|
||||
/// @param queueFamilyIndex Compute queue family index
|
||||
/// @param library Shader library
|
||||
/// @param signature Pipeline signature
|
||||
/// @param extent Base extent
|
||||
/// @param flow Flow scale
|
||||
/// @param perf Performance mode
|
||||
/// @param hdr HDR variant
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
explicit Pipeline(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
const vk::Queue& queue,
|
||||
uint32_t queueFamilyIndex,
|
||||
const library::ShaderLibrary& library,
|
||||
const PipelineSignature& signature,
|
||||
vk::Extent2D extent,
|
||||
float flow,
|
||||
bool perf,
|
||||
bool hdr
|
||||
);
|
||||
|
||||
///
|
||||
/// Get all external input images
|
||||
///
|
||||
/// @return List of images
|
||||
///
|
||||
[[nodiscard]] auto& getExternalInputs() const {
|
||||
return this->m_externalInputs;
|
||||
}
|
||||
|
||||
/// Get all external output images
|
||||
[[nodiscard]] auto& getExternalOutputs() const {
|
||||
return this->m_externalOutputs;
|
||||
}
|
||||
|
||||
///
|
||||
/// Get the mapped uniform buffer
|
||||
///
|
||||
/// @return Mapped uniform buffer
|
||||
///
|
||||
[[nodiscard]] auto* getMappedBuffer() const {
|
||||
return *this->m_descriptorSet.mappedBuffer.get();
|
||||
}
|
||||
|
||||
///
|
||||
/// Get all command buffers
|
||||
///
|
||||
/// @return List of command buffers
|
||||
///
|
||||
[[nodiscard]] auto& getCmdbufs() const {
|
||||
return this->m_cmdbufs;
|
||||
}
|
||||
|
||||
///
|
||||
/// Build a transition command buffer
|
||||
///
|
||||
/// @param dld Vulkan dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param iteration Current iteration
|
||||
/// @param index Index of the iteration
|
||||
/// @param total Total iterations
|
||||
/// @return Command buffer handle
|
||||
///
|
||||
vk::CommandBuffer buildTransCmdbuf(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
uint32_t iteration,
|
||||
uint32_t index,
|
||||
uint32_t total
|
||||
);
|
||||
|
||||
private:
|
||||
/// Vulkan descriptor set & pipeline layout
|
||||
struct Layout {
|
||||
vk::UniqueDescriptorSetLayout layout;
|
||||
vk::UniquePipelineLayout pipelineLayout;
|
||||
};
|
||||
Layout m_layout;
|
||||
|
||||
/// Sub-image of a Vulkan image
|
||||
struct SubImage {
|
||||
vk::UniqueImage image;
|
||||
vk::MemoryRequirements memory;
|
||||
vk::UniqueImageView view;
|
||||
};
|
||||
|
||||
/// Vulkan image created from an ImageSignature
|
||||
struct Image {
|
||||
ImageSignature signature;
|
||||
std::vector<SubImage> subimages;
|
||||
vk::DeviceSize size{};
|
||||
};
|
||||
std::vector<Image> m_images;
|
||||
|
||||
std::vector<ExternalImage> m_externalInputs;
|
||||
std::vector<ExternalImage> m_externalOutputs;
|
||||
|
||||
/// Memory allocation sub-segment
|
||||
struct MemorySubSegment {
|
||||
vk::DeviceSize size{};
|
||||
vk::DeviceSize offset{}; // Offset in memory segment
|
||||
};
|
||||
|
||||
/// Memory allocation segment
|
||||
struct MemorySegment {
|
||||
size_t imageIdx{};
|
||||
std::vector<MemorySubSegment> subsegments;
|
||||
vk::DeviceSize size{};
|
||||
vk::DeviceSize offset{}; // Offset in allocation
|
||||
};
|
||||
|
||||
/// Memory allocation info
|
||||
struct AllocationInfo {
|
||||
vk::UniqueDeviceMemory memory;
|
||||
std::vector<MemorySegment> segments;
|
||||
vk::DeviceSize size{};
|
||||
};
|
||||
std::array<AllocationInfo, 2> m_allocations;
|
||||
std::unordered_map<size_t, vk::UniqueDeviceMemory> m_externalAllocations;
|
||||
|
||||
/// Vulkan descriptor set
|
||||
struct DescriptorSet {
|
||||
vk::UniqueDescriptorPool pool;
|
||||
vk::DescriptorSet set; // Can not be freed
|
||||
std::pair<vk::UniqueBuffer, vk::UniqueDeviceMemory> buffer;
|
||||
std::shared_ptr<UniformBuffer*> mappedBuffer;
|
||||
std::array<vk::UniqueSampler, 3> samplers;
|
||||
};
|
||||
DescriptorSet m_descriptorSet;
|
||||
|
||||
vk::UniquePipelineCache m_cache;
|
||||
std::unordered_map<std::string_view, vk::UniquePipeline> m_pipelines;
|
||||
|
||||
/// Single iteration of a sub-stage
|
||||
struct SubIteration {
|
||||
uint32_t iterationIndex{};
|
||||
vk::Extent2D dispatch;
|
||||
bool isSpecial{};
|
||||
};
|
||||
|
||||
/// Sub-stage of an execution stage
|
||||
struct SubStage {
|
||||
std::string_view pipeline;
|
||||
std::vector<SubIteration> subiterations;
|
||||
};
|
||||
|
||||
/// Execution stage
|
||||
struct Stage {
|
||||
std::vector<SubStage> substages;
|
||||
std::vector<size_t> sampledImages;
|
||||
std::vector<size_t> storageImages;
|
||||
};
|
||||
std::vector<Stage> m_stages;
|
||||
|
||||
vk::UniqueCommandPool m_pool;
|
||||
std::vector<vk::UniqueCommandBuffer> m_cmdbufs;
|
||||
std::unordered_map<uint64_t, vk::UniqueCommandBuffer> m_transCmdbufs;
|
||||
};
|
||||
|
||||
}
|
||||
340
lsfg-vk-backend/src/modules/pipeline/signature.hpp
Normal file
340
lsfg-vk-backend/src/modules/pipeline/signature.hpp
Normal file
|
|
@ -0,0 +1,340 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "signature/helpers.hpp"
|
||||
#include "signature/image.hpp"
|
||||
#include "signature/pass.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <iterator>
|
||||
#include <numeric>
|
||||
#include <optional>
|
||||
#include <ranges>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace lsfgvk::pipeline {
|
||||
|
||||
/// Type of a descriptor set binding
|
||||
enum class BindingType : uint8_t {
|
||||
SampledImage,
|
||||
StorageImage
|
||||
};
|
||||
|
||||
/// Signature of a descriptor set binding
|
||||
struct BindingSignature {
|
||||
/// Type of binding
|
||||
BindingType type{ BindingType::SampledImage };
|
||||
/// Resources attached to binding
|
||||
inplace_vector<size_t, 16> resources;
|
||||
};
|
||||
|
||||
/// Signature of a pipeline stage
|
||||
struct StageSignature {
|
||||
/// Passes executed this stage
|
||||
inplace_vector<size_t, 8> passes;
|
||||
};
|
||||
|
||||
///
|
||||
/// Signature of a compute pipeline
|
||||
///
|
||||
struct PipelineSignature {
|
||||
/// Shader names used by the pipeline (and if there are hdr variants)
|
||||
inplace_vector<std::pair<std::string_view, bool>, 32> shaders;
|
||||
/// Images used by the pipeline
|
||||
inplace_vector<ImageSignature, 192> images;
|
||||
/// Ordered set of bindings for the descriptor set
|
||||
inplace_vector<BindingSignature, 192> descriptors;
|
||||
/// Indexable list of all passes
|
||||
inplace_vector<PassSignature, 100> passes;
|
||||
/// Ordered list of stages, excecuted in sequence
|
||||
inplace_vector<StageSignature, 100> stages;
|
||||
/// Stage index where the command buffers are split
|
||||
inplace_vector<size_t, 4> splitIndices;
|
||||
};
|
||||
|
||||
///
|
||||
/// The signature of a compute pipeline
|
||||
///
|
||||
class PipelineSignatureBuilder {
|
||||
public:
|
||||
///
|
||||
/// Create a new empty signature builder
|
||||
///
|
||||
explicit PipelineSignatureBuilder() = default;
|
||||
|
||||
///
|
||||
/// Register an image
|
||||
///
|
||||
/// @param image Image signature
|
||||
/// @return Handle to the image
|
||||
///
|
||||
consteval size_t registerImage(ImageSignature image) {
|
||||
this->m_images.push_back(std::move(image));
|
||||
return this->m_images.size() - 1;
|
||||
}
|
||||
|
||||
///
|
||||
/// Append a pass
|
||||
///
|
||||
/// @param pass Pass signature
|
||||
/// @return Handle to the pass
|
||||
///
|
||||
consteval size_t appendPass(PassSignature pass) {
|
||||
this->m_passes.push_back(std::move(pass));
|
||||
return this->m_passes.size() - 1;
|
||||
}
|
||||
|
||||
///
|
||||
/// Split the command buffer
|
||||
///
|
||||
consteval void split() {
|
||||
this->m_splitIndices.emplace_back(this->m_passes.size());
|
||||
}
|
||||
|
||||
///
|
||||
/// Compute a pipeline signature
|
||||
///
|
||||
/// @throws const char* on failure
|
||||
/// @return Pipeline siganture
|
||||
///
|
||||
consteval PipelineSignature finalize() {
|
||||
PipelineSignature s{};
|
||||
|
||||
struct ShaderInfo {
|
||||
std::string_view id;
|
||||
bool hasHdrVariant{};
|
||||
size_t sampledImageBindings{}; // Only the amount suffices here
|
||||
std::vector<std::vector<size_t>> storageImageBindings;
|
||||
};
|
||||
std::vector<ShaderInfo> shaderInfos;
|
||||
|
||||
// Populate shader map with empty bindings
|
||||
for (const auto& pass : this->m_passes) {
|
||||
const auto it{std::ranges::find_if(shaderInfos, [&pass](const auto& shader) {
|
||||
return shader.id == pass.shader;
|
||||
})};
|
||||
const bool firstOccurrence{it == shaderInfos.end()};
|
||||
const bool isAggregatePass{pass.flags & PassFlag::Aggregate};
|
||||
|
||||
auto& shader{firstOccurrence ? shaderInfos.emplace_back() : *it};
|
||||
|
||||
if (firstOccurrence) {
|
||||
shader.id = pass.shader;
|
||||
shader.hasHdrVariant = pass.flags & PassFlag::HdrVariant;
|
||||
shader.sampledImageBindings = pass.inputs.size();
|
||||
shader.storageImageBindings.resize(pass.outputs.size());
|
||||
}
|
||||
|
||||
// Ensure consistent usage aross invocations
|
||||
if (!firstOccurrence && !isAggregatePass)
|
||||
throw "Shader \"" + std::string(pass.shader) + "\" is used by "
|
||||
"multiple passes but does not have the Aggregate flag set";
|
||||
|
||||
if (shader.sampledImageBindings != pass.inputs.size())
|
||||
throw "Shader \"" + std::string(pass.shader) + "\" has "
|
||||
"inconsistent read counts across passes";
|
||||
if (shader.storageImageBindings.size() != pass.outputs.size())
|
||||
throw "Shader \"" + std::string(pass.shader) + "\" has "
|
||||
"inconsistent write counts across passes";
|
||||
|
||||
// Collect all used resources written by this shader
|
||||
for (size_t i = 0; i < pass.outputs.size(); i++) {
|
||||
const auto& resource{pass.outputs.at(i)};
|
||||
if (!resource.idx())
|
||||
continue;
|
||||
|
||||
const auto& image{this->m_images.at(*resource.idx())};
|
||||
if (isAggregatePass && (image.flags & ImageFlag::Mipmaps) && !resource.layer())
|
||||
throw "Pass \"" + std::string(pass.shader) + "\" has "
|
||||
"Aggregate flag but fully writes to an image with Mipmaps flag";
|
||||
|
||||
shader.storageImageBindings.at(i).push_back(*resource.idx());
|
||||
}
|
||||
}
|
||||
|
||||
// Create descriptors for all resources
|
||||
for (size_t i = 0; i < this->m_images.size(); i++) {
|
||||
const auto& image{this->m_images.at(i)};
|
||||
if (image.flags & ImageFlag::ExternalInput) {
|
||||
s.descriptors.push_back({
|
||||
.type = BindingType::SampledImage,
|
||||
.resources = { i }
|
||||
});
|
||||
}
|
||||
}
|
||||
for (const auto& shader : shaderInfos) {
|
||||
for (const auto& resources : shader.storageImageBindings) {
|
||||
s.descriptors.push_back({
|
||||
.type = BindingType::StorageImage,
|
||||
.resources = resources
|
||||
});
|
||||
|
||||
// Skip sampled image bindings for external outputs
|
||||
const auto& image{this->m_images.at(resources.front())};
|
||||
if (image.flags & ImageFlag::ExternalOutput)
|
||||
continue;
|
||||
|
||||
s.descriptors.push_back({
|
||||
.type = BindingType::SampledImage,
|
||||
.resources = resources
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate pipeline stages by reordering passes with dependencies as constraints
|
||||
std::vector<size_t> writtenImages;
|
||||
for (size_t i = 0; i < this->m_images.size(); i++) {
|
||||
const auto& image{this->m_images.at(i)};
|
||||
if (image.flags & ImageFlag::ExternalInput)
|
||||
writtenImages.push_back(i);
|
||||
}
|
||||
|
||||
std::vector<size_t> remainingPasses(this->m_passes.size());
|
||||
std::iota(remainingPasses.begin(), remainingPasses.end(), 0);
|
||||
|
||||
size_t currentStageIndex{0};
|
||||
std::pair<size_t, size_t> currentStageBounds{
|
||||
0,
|
||||
this->m_splitIndices.empty() ? this->m_passes.size() : this->m_splitIndices.front()
|
||||
};
|
||||
|
||||
while (!remainingPasses.empty()) {
|
||||
auto& currentStage{s.stages.emplace_back()};
|
||||
|
||||
// Find all passes that may be executed next
|
||||
std::vector<size_t> validPasses{};
|
||||
for (const auto& passIdx : remainingPasses) {
|
||||
if (passIdx < currentStageBounds.first || passIdx >= currentStageBounds.second)
|
||||
continue; // Skip passes that are not in the current stage
|
||||
|
||||
const auto& pass{this->m_passes.at(passIdx)};
|
||||
|
||||
bool isValid{true};
|
||||
for (const auto& image : pass.inputs) {
|
||||
if (!image.idx())
|
||||
continue;
|
||||
if (std::ranges::find(writtenImages, *image.idx()) != writtenImages.end())
|
||||
continue;
|
||||
|
||||
isValid = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!isValid)
|
||||
continue;
|
||||
|
||||
validPasses.push_back(passIdx);
|
||||
}
|
||||
|
||||
// If no valid pass exists in the current stage, move on to the next stage
|
||||
if (validPasses.empty() && currentStageIndex < this->m_splitIndices.size()) {
|
||||
currentStageIndex++;
|
||||
currentStageBounds = {
|
||||
currentStageBounds.second,
|
||||
currentStageIndex < this->m_splitIndices.size() ?
|
||||
this->m_splitIndices.at(currentStageIndex) : this->m_passes.size()
|
||||
};
|
||||
|
||||
s.stages.pop_back();
|
||||
s.splitIndices.emplace_back(s.stages.size());
|
||||
continue;
|
||||
}
|
||||
|
||||
// Sort valid passes by shader name
|
||||
auto begin = std::ranges::begin(validPasses);
|
||||
auto end = std::ranges::end(validPasses);
|
||||
for (auto i = begin; i != end; i++) {
|
||||
std::rotate(
|
||||
std::upper_bound(begin, i, *i, [this](size_t a, size_t b) {
|
||||
return this->m_passes.at(a).shader < this->m_passes.at(b).shader;
|
||||
}),
|
||||
i, std::next(i)
|
||||
);
|
||||
}
|
||||
|
||||
// Merge passes into execution step
|
||||
for (const auto& passIdx : validPasses) {
|
||||
const auto& pass{this->m_passes.at(passIdx)};
|
||||
|
||||
for (const auto& resource : pass.outputs) {
|
||||
if (!resource.idx())
|
||||
continue;
|
||||
writtenImages.push_back(*resource.idx());
|
||||
}
|
||||
|
||||
currentStage.passes.push_back(passIdx);
|
||||
remainingPasses.erase(std::ranges::find(remainingPasses, passIdx));
|
||||
}
|
||||
}
|
||||
|
||||
// Calculate usage timeline for each image
|
||||
for (size_t i = 0; i < this->m_images.size(); i++) {
|
||||
auto& image{this->m_images.at(i)};
|
||||
if (image.flags & ImageFlag::Pinned)
|
||||
continue;
|
||||
|
||||
std::optional<size_t> writeIndex;
|
||||
std::optional<size_t> readIndex;
|
||||
|
||||
// Find the first stage that writes to the image and last stage that reads from it
|
||||
for (size_t j = 0; j < s.stages.size(); j++) {
|
||||
const auto& stage{s.stages.at(j)};
|
||||
|
||||
for (const auto& passIdx : stage.passes) {
|
||||
const auto& pass{this->m_passes.at(passIdx)};
|
||||
|
||||
const bool isRead{
|
||||
std::ranges::any_of(pass.inputs, [i](const auto& resource) {
|
||||
return resource.idx() && *resource.idx() == i;
|
||||
})
|
||||
};
|
||||
const bool isWritten{
|
||||
std::ranges::any_of(pass.outputs, [i](const auto& resource) {
|
||||
return resource.idx() && *resource.idx() == i;
|
||||
})
|
||||
};
|
||||
|
||||
if (writeIndex && isWritten)
|
||||
throw "Image " + std::to_string(i) +
|
||||
" is written by multiple passes";
|
||||
if (isWritten && isRead)
|
||||
throw "Image " + std::to_string(i) +
|
||||
" is read & write in the same pass";
|
||||
|
||||
if (isWritten)
|
||||
writeIndex.emplace(j);
|
||||
if (isRead)
|
||||
readIndex.emplace(std::max(readIndex.value_or(0), j));
|
||||
}
|
||||
}
|
||||
|
||||
if (!writeIndex)
|
||||
throw "Image " + std::to_string(i) + " is not written to by any pass";
|
||||
if (!readIndex)
|
||||
throw "Image " + std::to_string(i) + " is not read from by any pass";
|
||||
|
||||
image.lifetime = { *writeIndex, *readIndex };
|
||||
}
|
||||
|
||||
// Copy remaining resources into signature
|
||||
for (const auto& shader : shaderInfos)
|
||||
s.shaders.emplace_back(shader.id, shader.hasHdrVariant);
|
||||
for (const auto& image : this->m_images)
|
||||
s.images.push_back(image);
|
||||
for (const auto& pass : this->m_passes)
|
||||
s.passes.push_back(pass);
|
||||
return s;
|
||||
}
|
||||
private:
|
||||
std::vector<ImageSignature> m_images;
|
||||
std::vector<PassSignature> m_passes;
|
||||
std::vector<size_t> m_splitIndices;
|
||||
};
|
||||
|
||||
}
|
||||
128
lsfg-vk-backend/src/modules/pipeline/signature/helpers.hpp
Normal file
128
lsfg-vk-backend/src/modules/pipeline/signature/helpers.hpp
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <initializer_list>
|
||||
#include <new>
|
||||
#include <stdexcept>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace lsfgvk::pipeline {
|
||||
|
||||
/// C++26 backported inplace_vector
|
||||
template<typename T, size_t N>
|
||||
class inplace_vector {
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
|
||||
public:
|
||||
// Constructors
|
||||
constexpr inplace_vector() = default;
|
||||
constexpr inplace_vector(std::initializer_list<T> init) {
|
||||
if (init.size() > N) throw std::bad_alloc();
|
||||
for (auto& elem : init)
|
||||
this->m_data.at(this->m_size++) = elem;
|
||||
}
|
||||
constexpr inplace_vector(const std::vector<T>& vec) {
|
||||
if (vec.size() > N) throw std::bad_alloc();
|
||||
for (const auto& elem : vec)
|
||||
this->m_data.at(this->m_size++) = elem;
|
||||
}
|
||||
|
||||
// Appending elements
|
||||
constexpr void push_back(const T& value) {
|
||||
if (this->m_size >= N) throw std::bad_alloc();
|
||||
this->m_data.at(this->m_size++) = value;
|
||||
}
|
||||
constexpr void push_back(T&& value) {
|
||||
if (this->m_size >= N) throw std::bad_alloc();
|
||||
this->m_data.at(this->m_size++) = std::move(value);
|
||||
}
|
||||
template<typename... Args>
|
||||
constexpr T& emplace_back(Args&&... args) {
|
||||
if (this->m_size >= N) throw std::bad_alloc();
|
||||
this->m_data.at(this->m_size) = T(std::forward<Args>(args)...);
|
||||
return this->m_data.at(this->m_size++);
|
||||
}
|
||||
constexpr void clear() { this->m_size = 0; }
|
||||
|
||||
// Accessing elements
|
||||
constexpr T& operator[](size_t idx) { return this->m_data.at(idx); }
|
||||
constexpr const T& operator[](size_t idx) const { return this->m_data.at(idx); }
|
||||
[[nodiscard]] constexpr T& at(size_t idx) {
|
||||
if (idx >= this->m_size) throw std::out_of_range("Index out of range");
|
||||
return this->m_data.at(idx);
|
||||
}
|
||||
[[nodiscard]] constexpr const T& at(size_t idx) const {
|
||||
if (idx >= this->m_size) throw std::out_of_range("Index out of range");
|
||||
return this->m_data.at(idx);
|
||||
}
|
||||
[[nodiscard]] constexpr T& front() { return this->m_data.front(); }
|
||||
[[nodiscard]] constexpr const T& front() const { return this->m_data.front(); }
|
||||
[[nodiscard]] constexpr T& back() { return this->m_data.at(this->m_size - 1); }
|
||||
[[nodiscard]] constexpr const T& back() const { return this->m_data.at(this->m_size - 1); }
|
||||
|
||||
// Iterating elements
|
||||
[[nodiscard]] constexpr T* begin() { return this->m_data.data(); }
|
||||
[[nodiscard]] constexpr const T* begin() const { return this->m_data.data(); }
|
||||
[[nodiscard]] constexpr const T* cbegin() const { return this->m_data.data(); }
|
||||
[[nodiscard]] constexpr T* end() { return this->m_data.data() + this->m_size; } // NOLINT (pointer arithmetic)
|
||||
[[nodiscard]] constexpr const T* end() const { return this->m_data.data() + this->m_size; } // NOLINT (pointer arithmetic)
|
||||
[[nodiscard]] constexpr const T* cend() const { return this->m_data.data() + this->m_size; } // NOLINT (pointer arithmetic)
|
||||
|
||||
// Removing elements
|
||||
constexpr void pop_back() {
|
||||
if (this->m_size == 0) throw std::out_of_range("Vector is empty");
|
||||
this->m_size--;
|
||||
}
|
||||
|
||||
// Query capacity
|
||||
[[nodiscard]] constexpr size_t size() const { return this->m_size; }
|
||||
[[nodiscard]] constexpr size_t capacity() const { return N; }
|
||||
[[nodiscard]] constexpr bool empty() const { return this->m_size == 0; }
|
||||
private:
|
||||
std::array<T, N> m_data{};
|
||||
size_t m_size{0};
|
||||
#pragma clang diagnostic pop
|
||||
};
|
||||
|
||||
/// Sequence of operations to apply to the base extent
|
||||
class ExtentOp {
|
||||
public:
|
||||
/// Default constructor for no operations and no flow scaling
|
||||
constexpr ExtentOp() = default;
|
||||
/// Constructor for no operations aside from flow scale
|
||||
constexpr ExtentOp(bool flow) : m_flow(flow) {}
|
||||
/// Constructor for a single operation
|
||||
constexpr ExtentOp(bool flow, uint32_t add, uint32_t shift)
|
||||
: m_flow(flow), m_operations({{add, shift}}) {}
|
||||
/// Constructor for a single operation starting from the flow base extent
|
||||
constexpr ExtentOp(uint32_t add, uint32_t shift)
|
||||
: m_flow(true), m_operations({{add, shift}}) {}
|
||||
// Combine two extents
|
||||
constexpr ExtentOp operator+(const ExtentOp& other) const {
|
||||
ExtentOp result{*this};
|
||||
for (const auto& [add, shift] : other.m_operations)
|
||||
result.m_operations.emplace_back(add, shift);
|
||||
return result;
|
||||
}
|
||||
// Combine two extends
|
||||
constexpr ExtentOp operator+=(const ExtentOp& other) {
|
||||
for (const auto& [add, shift] : other.m_operations)
|
||||
this->m_operations.emplace_back(add, shift);
|
||||
return *this;
|
||||
}
|
||||
/// Get the flow value
|
||||
[[nodiscard]] constexpr auto flow() const { return this->m_flow; }
|
||||
/// Get the operations
|
||||
[[nodiscard]] constexpr const auto& operations() const { return this->m_operations; }
|
||||
private:
|
||||
bool m_flow{false};
|
||||
inplace_vector<std::pair<uint32_t, uint32_t>, 8> m_operations;
|
||||
};
|
||||
|
||||
}
|
||||
95
lsfg-vk-backend/src/modules/pipeline/signature/image.hpp
Normal file
95
lsfg-vk-backend/src/modules/pipeline/signature/image.hpp
Normal file
|
|
@ -0,0 +1,95 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
|
||||
namespace lsfgvk::pipeline {
|
||||
|
||||
/// All supported image formats
|
||||
enum class Format : char {
|
||||
/// Invalid format
|
||||
Invalid = 0,
|
||||
/// 8-bit unsigned normalized RGBA format
|
||||
RGBA8888 = 37, // VK_FORMAT_R8G8B8A8_UNORM
|
||||
/// 8-bit unsigned normalized R format
|
||||
R8 = 9, // VK_FORMAT_R8_UNORM
|
||||
/// 16-bit signed floating point RGBA format
|
||||
RGBA16161616 = 97, // VK_FORMAT_R16G16B16A16_SFLOAT
|
||||
};
|
||||
|
||||
/// All supported image flags
|
||||
enum class ImageFlag : char {
|
||||
/// No special flags
|
||||
None = 0,
|
||||
/// Instead of using a single image array, create several individual images with halving
|
||||
/// extends for each mip level.
|
||||
///
|
||||
/// This will cause the image to show up as Texture2D[], rather than Texture2DArray
|
||||
/// and must therefore not be used in full with passes where the "Aggregate" flag is set.
|
||||
Mipmaps = 1 << 0,
|
||||
/// Indicate that the image is pinned & not transient
|
||||
Pinned = 1 << 1,
|
||||
/// Indicate that this image is written to externally
|
||||
ExternalInput = 1 << 2,
|
||||
/// Indicate that this image is read from externally
|
||||
ExternalOutput = 1 << 3,
|
||||
/// Indicate that a separate format should be used for HDR
|
||||
HdrVariant = 1 << 4
|
||||
};
|
||||
|
||||
/// Helper type for operating on image flags
|
||||
class ImageFlags {
|
||||
public:
|
||||
/// Default constructor
|
||||
constexpr ImageFlags() = default;
|
||||
/// Create from single image flag
|
||||
constexpr ImageFlags(ImageFlag flag) : m_flags(static_cast<int>(flag)) {}
|
||||
/// Check any set of flags
|
||||
constexpr operator bool() const { return m_flags != 0; }
|
||||
/// Combine with another flag
|
||||
constexpr ImageFlags operator|(ImageFlag flag) const {
|
||||
return{this->m_flags | static_cast<int>(flag)};
|
||||
}
|
||||
/// Match with another flag
|
||||
constexpr ImageFlags operator&(ImageFlag flag) const {
|
||||
return{this->m_flags & static_cast<int>(flag)};
|
||||
}
|
||||
/// Match with another flag instance
|
||||
constexpr ImageFlags operator&(ImageFlags other) const {
|
||||
return{this->m_flags & other.m_flags};
|
||||
}
|
||||
private:
|
||||
int m_flags{static_cast<int>(ImageFlag::None)};
|
||||
|
||||
// Create from number
|
||||
constexpr ImageFlags(int flags) : m_flags(flags) {}
|
||||
};
|
||||
|
||||
/// Compine two image flags
|
||||
constexpr ImageFlags operator|(ImageFlag lhs, ImageFlag rhs) {
|
||||
return ImageFlags(lhs) | rhs;
|
||||
}
|
||||
|
||||
/// Signature for an image
|
||||
struct ImageSignature {
|
||||
/// Format of the image
|
||||
Format format{ Format::RGBA8888 };
|
||||
/// Optional second format for HDR variants
|
||||
Format hdrFormat{ Format::RGBA16161616 };
|
||||
/// Optional flags for the image
|
||||
ImageFlags flags{ ImageFlag::None };
|
||||
/// Operation applied to the base extent for calculating the image extent
|
||||
ExtentOp extentOp;
|
||||
/// Amount of layers in the image
|
||||
uint32_t count{1};
|
||||
|
||||
/// Lifetime of the image (set by pipeline builder)
|
||||
std::pair<size_t, size_t> lifetime;
|
||||
};
|
||||
|
||||
}
|
||||
88
lsfg-vk-backend/src/modules/pipeline/signature/pass.hpp
Normal file
88
lsfg-vk-backend/src/modules/pipeline/signature/pass.hpp
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "helpers.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <string_view>
|
||||
|
||||
namespace lsfgvk::pipeline {
|
||||
|
||||
/// All supported pass flags
|
||||
enum class PassFlag : char {
|
||||
/// No special flags
|
||||
None = 0,
|
||||
/// Indicates the shader will be reused several times and resources must be
|
||||
/// aggregated into arrays and indexed via push constants.
|
||||
Aggregate = 1 << 0,
|
||||
/// Indicate that the special flag is set via push constant.
|
||||
Special = 1 << 1,
|
||||
/// Indicate that there are two variants for 8-bit and 16-bit foramtrs
|
||||
HdrVariant = 1 << 2
|
||||
};
|
||||
|
||||
/// Helper type for operating on pass flags
|
||||
class PassFlags {
|
||||
public:
|
||||
/// Default constructor
|
||||
constexpr PassFlags() = default;
|
||||
/// Create from single pass flag
|
||||
constexpr PassFlags(PassFlag flag) : m_flags(static_cast<int>(flag)) {}
|
||||
/// Check any set of flags
|
||||
constexpr operator bool() const { return m_flags != 0; }
|
||||
/// Combine with another flag
|
||||
constexpr PassFlags operator|(PassFlag flag) const {
|
||||
return{this->m_flags | static_cast<int>(flag)};
|
||||
}
|
||||
/// Match with another flag
|
||||
constexpr PassFlags operator&(PassFlag flag) const {
|
||||
return{this->m_flags & static_cast<int>(flag)};
|
||||
}
|
||||
private:
|
||||
int m_flags{static_cast<int>(PassFlag::None)};
|
||||
|
||||
// Create from number
|
||||
constexpr PassFlags(int flags) : m_flags(flags) {}
|
||||
};
|
||||
|
||||
/// Combine two pass flags
|
||||
constexpr PassFlags operator|(PassFlag lhs, PassFlag rhs) {
|
||||
return PassFlags(lhs) | rhs;
|
||||
}
|
||||
|
||||
/// A pointer to an image, or a specific layer inside that image
|
||||
class Resource {
|
||||
public:
|
||||
/// Default constructor
|
||||
constexpr Resource() = default;
|
||||
/// Constructor for a full image
|
||||
constexpr Resource(size_t idx) : m_idx(idx) {}
|
||||
/// Constructor for a single layer
|
||||
constexpr Resource(size_t idx, uint32_t layer) : m_idx(idx), m_layer(layer) {}
|
||||
/// Get the flow value
|
||||
[[nodiscard]] constexpr auto idx() const { return this->m_idx; }
|
||||
/// Get the operations
|
||||
[[nodiscard]] constexpr auto layer() const { return this->m_layer; }
|
||||
private:
|
||||
std::optional<size_t> m_idx{0};
|
||||
std::optional<uint32_t> m_layer;
|
||||
};
|
||||
|
||||
/// Signature of a shader pass
|
||||
struct PassSignature {
|
||||
/// Name of the shader
|
||||
std::string_view shader;
|
||||
/// Optional flags of this pass
|
||||
PassFlags flags{ PassFlag::None };
|
||||
/// Resources to read from
|
||||
inplace_vector<Resource, 8> inputs;
|
||||
/// Resources to write to
|
||||
inplace_vector<Resource, 8> outputs;
|
||||
/// Operation applied to the base extent for calculating the dispatch extent
|
||||
ExtentOp dispatchOp;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
@ -1,73 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "alpha0.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
Alpha0::Alpha0(const Ctx& ctx,
|
||||
const vk::Image& sourceImage) {
|
||||
const size_t m = ctx.perf ? 1 : 2; // multiplier
|
||||
const VkExtent2D halfExtent = backend::add_shift_extent(sourceImage.getExtent(), 1, 1);
|
||||
const VkExtent2D quarterExtent = backend::add_shift_extent(halfExtent, 1, 1);
|
||||
|
||||
// create temporary & output images
|
||||
this->tempImages0.reserve(m);
|
||||
this->tempImages1.reserve(m);
|
||||
for (size_t i = 0; i < m; i++) {
|
||||
this->tempImages0.emplace_back(ctx.vk, halfExtent);
|
||||
this->tempImages1.emplace_back(ctx.vk, halfExtent);
|
||||
}
|
||||
|
||||
this->images.reserve(2 * m);
|
||||
for (size_t i = 0; i < (2 * m); i++)
|
||||
this->images.emplace_back(ctx.vk, quarterExtent);
|
||||
|
||||
// create descriptor sets
|
||||
const auto& shaders = ctx.perf ? ctx.shaders.get().performance : ctx.shaders.get().quality;
|
||||
this->sets.reserve(3);
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampled(sourceImage)
|
||||
.storages(this->tempImages0)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.alpha.at(0)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages0)
|
||||
.storages(this->tempImages1)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.alpha.at(1)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages1)
|
||||
.storages(this->images)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.alpha.at(2)));
|
||||
|
||||
// store dispatch extents
|
||||
this->dispatchExtent0 = backend::add_shift_extent(halfExtent, 7, 3);
|
||||
this->dispatchExtent1 = backend::add_shift_extent(quarterExtent, 7, 3);
|
||||
}
|
||||
|
||||
void Alpha0::prepare(std::vector<VkImage>& images) const {
|
||||
for (size_t i = 0; i < this->tempImages0.size(); i++) {
|
||||
images.push_back(this->tempImages0.at(i).handle());
|
||||
images.push_back(this->tempImages1.at(i).handle());
|
||||
}
|
||||
|
||||
for (const auto& image : this->images)
|
||||
images.push_back(image.handle());
|
||||
}
|
||||
|
||||
void Alpha0::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const {
|
||||
this->sets.at(0).dispatch(vk, cmd, this->dispatchExtent0);
|
||||
this->sets.at(1).dispatch(vk, cmd, this->dispatchExtent0);
|
||||
this->sets.at(2).dispatch(vk, cmd, this->dispatchExtent1);
|
||||
}
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../helpers/managed_shader.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace ctx { struct Ctx; }
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
/// pre-alpha shaderchain
|
||||
class Alpha0 {
|
||||
public:
|
||||
/// create a pre-alpha shaderchain
|
||||
/// @param ctx context
|
||||
/// @param sourceImage source image
|
||||
Alpha0(const Ctx& ctx,
|
||||
const vk::Image& sourceImage);
|
||||
|
||||
/// prepare the shaderchain initially
|
||||
/// @param images vector to fill with image handles
|
||||
void prepare(std::vector<VkImage>& images) const;
|
||||
|
||||
/// render the pre-alpha shaderchain
|
||||
/// @param vk the vulkan instance
|
||||
/// @param cmd command buffer
|
||||
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const;
|
||||
|
||||
/// get the generated images
|
||||
/// @return vector of images
|
||||
[[nodiscard]] const auto& getImages() const { return this->images; }
|
||||
private:
|
||||
std::vector<vk::Image> tempImages0;
|
||||
std::vector<vk::Image> tempImages1;
|
||||
std::vector<vk::Image> images;
|
||||
|
||||
std::vector<ManagedShader> sets;
|
||||
VkExtent2D dispatchExtent0{};
|
||||
VkExtent2D dispatchExtent1{};
|
||||
};
|
||||
}
|
||||
|
|
@ -1,54 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "alpha1.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
Alpha1::Alpha1(const Ctx& ctx, size_t temporal,
|
||||
const std::vector<vk::Image>& sourceImages) {
|
||||
const size_t m = ctx.perf ? 1 : 2; // multiplier
|
||||
const VkExtent2D quarterExtent = sourceImages.at(0).getExtent();
|
||||
|
||||
// create output images for mod3
|
||||
this->images.reserve(temporal);
|
||||
for(size_t i = 0; i < temporal; i++) {
|
||||
auto& vec = this->images.emplace_back();
|
||||
|
||||
vec.reserve(2 * m);
|
||||
for (size_t j = 0; j < (2 * m); j++)
|
||||
vec.emplace_back(ctx.vk, quarterExtent);
|
||||
}
|
||||
|
||||
// create descriptor sets
|
||||
const auto& shaders = ctx.perf ? ctx.shaders.get().performance : ctx.shaders.get().quality;
|
||||
this->sets.reserve(temporal);
|
||||
for (size_t i = 0; i < temporal; i++)
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(sourceImages)
|
||||
.storages(this->images.at(i))
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.alpha.at(3)));
|
||||
|
||||
// store dispatch extents
|
||||
this->dispatchExtent = backend::add_shift_extent(quarterExtent, 7, 3);
|
||||
}
|
||||
|
||||
void Alpha1::prepare(std::vector<VkImage>& images) const {
|
||||
for (const auto& vec : this->images)
|
||||
for (const auto& img : vec)
|
||||
images.push_back(img.handle());
|
||||
}
|
||||
|
||||
void Alpha1::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const {
|
||||
this->sets.at(idx % this->sets.size()).dispatch(vk, cmd, dispatchExtent);
|
||||
}
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../helpers/managed_shader.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace ctx { struct Ctx; }
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
/// alpha shaderchain
|
||||
class Alpha1 {
|
||||
public:
|
||||
/// create a alpha shaderchain
|
||||
/// @param ctx context
|
||||
/// @param temporal temporal count
|
||||
/// @param sourceImages source images
|
||||
Alpha1(const Ctx& ctx, size_t temporal,
|
||||
const std::vector<vk::Image>& sourceImages);
|
||||
|
||||
/// prepare the shaderchain initially
|
||||
/// @param images vector to fill with image handles
|
||||
void prepare(std::vector<VkImage>& images) const;
|
||||
|
||||
/// render the alpha shaderchain
|
||||
/// @param vk the vulkan instance
|
||||
/// @param cmd command buffer
|
||||
/// @param idx frame index
|
||||
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const;
|
||||
|
||||
/// get the generated images
|
||||
/// @return vector of images
|
||||
[[nodiscard]] const auto& getImages() const { return this->images; }
|
||||
private:
|
||||
std::vector<std::vector<vk::Image>> images;
|
||||
|
||||
std::vector<ManagedShader> sets;
|
||||
VkExtent2D dispatchExtent{};
|
||||
};
|
||||
}
|
||||
|
|
@ -1,50 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "beta0.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
Beta0::Beta0(const Ctx& ctx,
|
||||
const std::vector<std::vector<vk::Image>>& sourceImages) {
|
||||
const VkExtent2D extent = sourceImages.at(0).at(0).getExtent();
|
||||
|
||||
// create output images
|
||||
this->images.reserve(2);
|
||||
for(size_t i = 0; i < 2; i++)
|
||||
this->images.emplace_back(ctx.vk, extent);
|
||||
|
||||
// create descriptor sets
|
||||
const auto& shader = (ctx.perf ?
|
||||
ctx.shaders.get().performance : ctx.shaders.get().quality).beta.at(0);
|
||||
this->sets.reserve(sourceImages.size());
|
||||
for (size_t i = 0; i < sourceImages.size(); i++)
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(sourceImages.at((i + (sourceImages.size() - 2)) % sourceImages.size()))
|
||||
.sampleds(sourceImages.at((i + (sourceImages.size() - 1)) % sourceImages.size()))
|
||||
.sampleds(sourceImages.at(i % sourceImages.size()))
|
||||
.storages(this->images)
|
||||
.sampler(ctx.bnwSampler)
|
||||
.build(ctx.vk, ctx.pool, shader));
|
||||
|
||||
// store dispatch extents
|
||||
this->dispatchExtent = backend::add_shift_extent(extent, 7, 3);
|
||||
}
|
||||
|
||||
void Beta0::prepare(std::vector<VkImage>& images) const {
|
||||
for (const auto& img : this->images)
|
||||
images.push_back(img.handle());
|
||||
}
|
||||
|
||||
void Beta0::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const {
|
||||
this->sets.at(idx % this->sets.size()).dispatch(vk, cmd, dispatchExtent);
|
||||
}
|
||||
|
|
@ -1,46 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../helpers/managed_shader.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace ctx { struct Ctx; }
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
/// beta shaderchain
|
||||
class Beta0 {
|
||||
public:
|
||||
/// create a beta shaderchain
|
||||
/// @param ctx context
|
||||
/// @param sourceImages source images
|
||||
Beta0(const Ctx& ctx,
|
||||
const std::vector<std::vector<vk::Image>>& sourceImages);
|
||||
|
||||
/// prepare the shaderchain initially
|
||||
/// @param images vector to fill with image handles
|
||||
void prepare(std::vector<VkImage>& images) const;
|
||||
|
||||
/// render the beta shaderchain
|
||||
/// @param vk vulkan instance
|
||||
/// @param cmd command buffer
|
||||
/// @param idx frame index
|
||||
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const;
|
||||
|
||||
/// get the generated images
|
||||
/// @return vector of images
|
||||
[[nodiscard]] const auto& getImages() const { return this->images; }
|
||||
private:
|
||||
std::vector<vk::Image> images;
|
||||
|
||||
std::vector<ManagedShader> sets;
|
||||
VkExtent2D dispatchExtent{};
|
||||
};
|
||||
}
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "beta1.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
Beta1::Beta1(const Ctx& ctx,
|
||||
const std::vector<vk::Image>& sourceImages) {
|
||||
const VkExtent2D extent = sourceImages.at(0).getExtent();
|
||||
|
||||
// create temporary & output images
|
||||
this->tempImages0.reserve(2);
|
||||
this->tempImages1.reserve(2);
|
||||
for(uint32_t i = 0; i < 2; i++) {
|
||||
this->tempImages0.emplace_back(ctx.vk, extent);
|
||||
this->tempImages1.emplace_back(ctx.vk, extent);
|
||||
}
|
||||
|
||||
this->images.reserve(6);
|
||||
for (uint32_t i = 0; i < 6; i++)
|
||||
this->images.emplace_back(ctx.vk,
|
||||
backend::shift_extent(extent, i),
|
||||
VK_FORMAT_R8_UNORM);
|
||||
|
||||
// create descriptor sets
|
||||
const auto& shaders = (ctx.perf ?
|
||||
ctx.shaders.get().performance : ctx.shaders.get().quality).beta;
|
||||
this->sets.reserve(4);
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(sourceImages)
|
||||
.storages(this->tempImages0)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(1)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages0)
|
||||
.storages(this->tempImages1)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(2)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages1)
|
||||
.storages(this->tempImages0)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(3)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages0)
|
||||
.storages(this->images)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.buffer(ctx.constantBuffer)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(4)));
|
||||
|
||||
// store dispatch extents
|
||||
this->dispatchExtent0 = backend::add_shift_extent(extent, 7, 3);
|
||||
this->dispatchExtent1 = backend::add_shift_extent(extent, 31, 5);
|
||||
}
|
||||
|
||||
void Beta1::prepare(std::vector<VkImage>& images) const {
|
||||
for (size_t i = 0; i < 2; i++) {
|
||||
images.push_back(this->tempImages0.at(i).handle());
|
||||
images.push_back(this->tempImages1.at(i).handle());
|
||||
}
|
||||
for (const auto& img : this->images)
|
||||
images.push_back(img.handle());
|
||||
}
|
||||
|
||||
void Beta1::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const {
|
||||
this->sets.at(0).dispatch(vk, cmd, this->dispatchExtent0);
|
||||
this->sets.at(1).dispatch(vk, cmd, this->dispatchExtent0);
|
||||
this->sets.at(2).dispatch(vk, cmd, this->dispatchExtent0);
|
||||
this->sets.at(3).dispatch(vk, cmd, this->dispatchExtent1);
|
||||
}
|
||||
|
|
@ -1,48 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../helpers/managed_shader.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace ctx { struct Ctx; }
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
/// beta shaderchain
|
||||
class Beta1 {
|
||||
public:
|
||||
/// create a beta shaderchain
|
||||
/// @param ctx context
|
||||
/// @param sourceImages source images
|
||||
Beta1(const Ctx& ctx,
|
||||
const std::vector<vk::Image>& sourceImages);
|
||||
|
||||
/// prepare the shaderchain initially
|
||||
/// @param images vector to fill with image handles
|
||||
void prepare(std::vector<VkImage>& images) const;
|
||||
|
||||
/// render the beta shaderchain
|
||||
/// @param vk the vulkan instance
|
||||
/// @param cmd command buffer
|
||||
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const;
|
||||
|
||||
/// get the generated images
|
||||
/// @return vector of images
|
||||
[[nodiscard]] const auto& getImages() const { return this->images; }
|
||||
private:
|
||||
std::vector<vk::Image> tempImages0;
|
||||
std::vector<vk::Image> tempImages1;
|
||||
std::vector<vk::Image> images;
|
||||
|
||||
std::vector<ManagedShader> sets;
|
||||
VkExtent2D dispatchExtent0{};
|
||||
VkExtent2D dispatchExtent1{};
|
||||
};
|
||||
}
|
||||
|
|
@ -1,75 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "delta0.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
Delta0::Delta0(const Ctx& ctx, size_t idx,
|
||||
const std::vector<std::vector<vk::Image>>& sourceImages,
|
||||
const vk::Image& additionalInput0,
|
||||
const vk::Image& additionalInput1) {
|
||||
const size_t m = ctx.perf ? 1 : 2; // multiplier
|
||||
const VkExtent2D extent = sourceImages.at(0).at(0).getExtent();
|
||||
|
||||
// create output images
|
||||
this->images0.reserve(3);
|
||||
for(size_t i = 0; i < 3; i++)
|
||||
this->images0.emplace_back(ctx.vk, extent);
|
||||
this->images1.reserve(m);
|
||||
for (size_t i = 0; i < m; i++)
|
||||
this->images1.emplace_back(ctx.vk, extent);
|
||||
|
||||
// create descriptor sets
|
||||
const auto& shaders = (ctx.perf ?
|
||||
ctx.shaders.get().performance : ctx.shaders.get().quality).delta;
|
||||
|
||||
this->sets0.reserve(sourceImages.size());
|
||||
for (size_t i = 0; i < sourceImages.size(); i++)
|
||||
this->sets0.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(sourceImages.at((i + (sourceImages.size() - 1)) % sourceImages.size()))
|
||||
.sampleds(sourceImages.at(i % sourceImages.size()))
|
||||
.sampled(additionalInput0)
|
||||
.storages(this->images0)
|
||||
.sampler(ctx.bnwSampler)
|
||||
.sampler(ctx.eabSampler)
|
||||
.buffer(ctx.constantBuffers.at(idx))
|
||||
.build(ctx.vk, ctx.pool, shaders.at(0)));
|
||||
|
||||
this->sets1.reserve(sourceImages.size());
|
||||
for (size_t i = 0; i < sourceImages.size(); i++)
|
||||
this->sets1.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(sourceImages.at((i + (sourceImages.size() - 1)) % sourceImages.size()))
|
||||
.sampleds(sourceImages.at(i % sourceImages.size()))
|
||||
.sampled(additionalInput1)
|
||||
.sampled(additionalInput0)
|
||||
.storages(this->images1)
|
||||
.sampler(ctx.bnwSampler)
|
||||
.sampler(ctx.eabSampler)
|
||||
.buffer(ctx.constantBuffers.at(idx))
|
||||
.build(ctx.vk, ctx.pool, shaders.at(5)));
|
||||
|
||||
// store dispatch extents
|
||||
this->dispatchExtent = backend::add_shift_extent(extent, 7, 3);
|
||||
}
|
||||
|
||||
void Delta0::prepare(std::vector<VkImage>& images) const {
|
||||
for (const auto& img : this->images0)
|
||||
images.push_back(img.handle());
|
||||
for (const auto& img : this->images1)
|
||||
images.push_back(img.handle());
|
||||
}
|
||||
|
||||
void Delta0::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const {
|
||||
this->sets0.at(idx % this->sets0.size()).dispatch(vk, cmd, dispatchExtent);
|
||||
this->sets1.at(idx % this->sets1.size()).dispatch(vk, cmd, dispatchExtent);
|
||||
}
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../helpers/managed_shader.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace ctx { struct Ctx; }
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
/// delta shaderchain
|
||||
class Delta0 {
|
||||
public:
|
||||
/// create a delta shaderchain
|
||||
/// @param ctx context
|
||||
/// @param idx generated frame index
|
||||
/// @param sourceImages source images
|
||||
/// @param additionalInput0 additional input image
|
||||
/// @param additionalInput1 additional input image
|
||||
Delta0(const Ctx& ctx, size_t idx,
|
||||
const std::vector<std::vector<vk::Image>>& sourceImages,
|
||||
const vk::Image& additionalInput0,
|
||||
const vk::Image& additionalInput1);
|
||||
|
||||
/// prepare the shaderchain initially
|
||||
/// @param images vector to fill with image handles
|
||||
void prepare(std::vector<VkImage>& images) const;
|
||||
|
||||
/// render the delta shaderchain
|
||||
/// @param vk the vulkan instance
|
||||
/// @param cmd command buffer
|
||||
/// @param idx frame index
|
||||
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const;
|
||||
|
||||
/// get the generated images
|
||||
/// @return vector of images
|
||||
[[nodiscard]] const auto& getImages0() const { return this->images0; }
|
||||
|
||||
/// get the other generated images
|
||||
/// @return vector of images
|
||||
[[nodiscard]] const auto& getImages1() const { return this->images1; }
|
||||
private:
|
||||
std::vector<vk::Image> images0;
|
||||
std::vector<vk::Image> images1;
|
||||
|
||||
std::vector<ManagedShader> sets0;
|
||||
std::vector<ManagedShader> sets1;
|
||||
VkExtent2D dispatchExtent{};
|
||||
};
|
||||
}
|
||||
|
|
@ -1,110 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "delta1.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
Delta1::Delta1(const Ctx& ctx, size_t idx,
|
||||
const std::vector<vk::Image>& sourceImages0,
|
||||
const std::vector<vk::Image>& sourceImages1,
|
||||
const vk::Image& additionalInput0,
|
||||
const vk::Image& additionalInput1,
|
||||
const vk::Image& additionalInput2) {
|
||||
const size_t m = ctx.perf ? 1 : 2; // multiplier
|
||||
const VkExtent2D extent = sourceImages0.at(0).getExtent();
|
||||
|
||||
// create temporary & output images
|
||||
for (size_t i = 0; i < (2 * m); i++) {
|
||||
this->tempImages0.emplace_back(ctx.vk, extent);
|
||||
this->tempImages1.emplace_back(ctx.vk, extent);
|
||||
}
|
||||
this->image0.emplace(ctx.vk,
|
||||
VkExtent2D { extent.width, extent.height },
|
||||
VK_FORMAT_R16G16B16A16_SFLOAT
|
||||
);
|
||||
this->image1.emplace(ctx.vk,
|
||||
VkExtent2D { extent.width, extent.height },
|
||||
VK_FORMAT_R16G16B16A16_SFLOAT
|
||||
);
|
||||
|
||||
// create descriptor sets
|
||||
const auto& shaders = (ctx.perf ?
|
||||
ctx.shaders.get().performance : ctx.shaders.get().quality).delta;
|
||||
this->sets.reserve(4 + 4);
|
||||
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(sourceImages0)
|
||||
.storages(this->tempImages0)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(1)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages0)
|
||||
.storages(this->tempImages1)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(2)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages1)
|
||||
.storages(this->tempImages0)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(3)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages0)
|
||||
.sampled(additionalInput0)
|
||||
.sampled(additionalInput1)
|
||||
.storage(*this->image0)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.sampler(ctx.eabSampler)
|
||||
.buffer(ctx.constantBuffers.at(idx))
|
||||
.build(ctx.vk, ctx.pool, shaders.at(4)));
|
||||
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(sourceImages1)
|
||||
.storages(this->tempImages0, 0, m)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(6)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages0, 0, m)
|
||||
.storages(this->tempImages1, 0, m)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(7)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages1, 0, m)
|
||||
.storages(this->tempImages0, 0, m)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(8)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages0, 0, m)
|
||||
.sampled(additionalInput2)
|
||||
.storage(*this->image1)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.sampler(ctx.eabSampler)
|
||||
.buffer(ctx.constantBuffers.at(idx))
|
||||
.build(ctx.vk, ctx.pool, shaders.at(9)));
|
||||
|
||||
// store dispatch extents
|
||||
this->dispatchExtent = backend::add_shift_extent(extent, 7, 3);
|
||||
}
|
||||
|
||||
void Delta1::prepare(std::vector<VkImage>& images) const {
|
||||
for (size_t i = 0; i < this->tempImages0.size(); i++) {
|
||||
images.push_back(this->tempImages0.at(i).handle());
|
||||
images.push_back(this->tempImages1.at(i).handle());
|
||||
}
|
||||
images.push_back(this->image0->handle());
|
||||
images.push_back(this->image1->handle());
|
||||
}
|
||||
|
||||
void Delta1::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const {
|
||||
for (const auto& set : this->sets)
|
||||
set.dispatch(vk, cmd, dispatchExtent);
|
||||
}
|
||||
|
|
@ -1,62 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../helpers/managed_shader.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace ctx { struct Ctx; }
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
/// gamma shaderchain
|
||||
class Delta1 {
|
||||
public:
|
||||
/// create a gamma shaderchain
|
||||
/// @param ctx context
|
||||
/// @param idx generated frame index
|
||||
/// @param sourceImages0 source images
|
||||
/// @param sourceImages1 source images
|
||||
/// @param additionalInput0 additional input image
|
||||
/// @param additionalInput1 additional input image
|
||||
/// @param additionalInput2 additional input image
|
||||
Delta1(const Ctx& ctx, size_t idx,
|
||||
const std::vector<vk::Image>& sourceImages0,
|
||||
const std::vector<vk::Image>& sourceImages1,
|
||||
const vk::Image& additionalInput0,
|
||||
const vk::Image& additionalInput1,
|
||||
const vk::Image& additionalInput2);
|
||||
|
||||
/// prepare the shaderchain initially
|
||||
/// @param images vector to fill with image handles
|
||||
void prepare(std::vector<VkImage>& images) const;
|
||||
|
||||
/// render the gamma shaderchain
|
||||
/// @param vk the vulkan instance
|
||||
/// @param cmd command buffer
|
||||
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const;
|
||||
|
||||
/// get the first generated image
|
||||
/// @return image
|
||||
[[nodiscard]] const auto& getImage0() const { return *this->image0; }
|
||||
|
||||
/// get the second generated image
|
||||
/// @return image
|
||||
[[nodiscard]] const auto& getImage1() const { return *this->image1; }
|
||||
private:
|
||||
std::vector<vk::Image> tempImages0;
|
||||
std::vector<vk::Image> tempImages1;
|
||||
ls::lazy<vk::Image> image0;
|
||||
ls::lazy<vk::Image> image1;
|
||||
|
||||
std::vector<ManagedShader> sets;
|
||||
VkExtent2D dispatchExtent{};
|
||||
};
|
||||
}
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "gamma0.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
Gamma0::Gamma0(const Ctx& ctx, size_t idx,
|
||||
const std::vector<std::vector<vk::Image>>& sourceImages,
|
||||
const vk::Image& additionalInput) {
|
||||
const VkExtent2D extent = sourceImages.at(0).at(0).getExtent();
|
||||
|
||||
// create output images
|
||||
this->images.reserve(3);
|
||||
for(size_t i = 0; i < 3; i++)
|
||||
this->images.emplace_back(ctx.vk, extent);
|
||||
|
||||
// create descriptor sets
|
||||
const auto& shader = (ctx.perf ?
|
||||
ctx.shaders.get().performance : ctx.shaders.get().quality).gamma.at(0);
|
||||
this->sets.reserve(sourceImages.size());
|
||||
for (size_t i = 0; i < sourceImages.size(); i++)
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(sourceImages.at((i + (sourceImages.size() - 1)) % sourceImages.size()))
|
||||
.sampleds(sourceImages.at(i % sourceImages.size()))
|
||||
.sampled(additionalInput)
|
||||
.storages(this->images)
|
||||
.sampler(ctx.bnwSampler)
|
||||
.sampler(ctx.eabSampler)
|
||||
.buffer(ctx.constantBuffers.at(idx))
|
||||
.build(ctx.vk, ctx.pool, shader));
|
||||
|
||||
// store dispatch extents
|
||||
this->dispatchExtent = backend::add_shift_extent(extent, 7, 3);
|
||||
}
|
||||
|
||||
void Gamma0::prepare(std::vector<VkImage>& images) const {
|
||||
for (const auto& img : this->images)
|
||||
images.push_back(img.handle());
|
||||
}
|
||||
|
||||
void Gamma0::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const {
|
||||
this->sets.at(idx % this->sets.size()).dispatch(vk, cmd, dispatchExtent);
|
||||
}
|
||||
|
|
@ -1,49 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../helpers/managed_shader.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace ctx { struct Ctx; }
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
/// gamma shaderchain
|
||||
class Gamma0 {
|
||||
public:
|
||||
/// create a gamma shaderchain
|
||||
/// @param ctx context
|
||||
/// @param idx generated frame index
|
||||
/// @param sourceImages source images
|
||||
/// @param additionalInput additional input image
|
||||
Gamma0(const Ctx& ctx, size_t idx,
|
||||
const std::vector<std::vector<vk::Image>>& sourceImages,
|
||||
const vk::Image& additionalInput);
|
||||
|
||||
/// prepare the shaderchain initially
|
||||
/// @param images vector to fill with image handles
|
||||
void prepare(std::vector<VkImage>& images) const;
|
||||
|
||||
/// render the gamma shaderchain
|
||||
/// @param vk the vulkan instance
|
||||
/// @param cmd command buffer
|
||||
/// @param idx frame index
|
||||
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const;
|
||||
|
||||
/// get the generated images
|
||||
/// @return vector of images
|
||||
[[nodiscard]] const auto& getImages() const { return this->images; }
|
||||
private:
|
||||
std::vector<vk::Image> images;
|
||||
|
||||
std::vector<ManagedShader> sets;
|
||||
VkExtent2D dispatchExtent{};
|
||||
};
|
||||
}
|
||||
|
|
@ -1,78 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "gamma1.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
Gamma1::Gamma1(const Ctx& ctx, size_t idx,
|
||||
const std::vector<vk::Image>& sourceImages,
|
||||
const vk::Image& additionalInput0,
|
||||
const vk::Image& additionalInput1) {
|
||||
const size_t m = ctx.perf ? 1 : 2; // multiplier
|
||||
const VkExtent2D extent = sourceImages.at(0).getExtent();
|
||||
|
||||
// create temporary & output images
|
||||
for (size_t i = 0; i < (2 * m); i++) {
|
||||
this->tempImages0.emplace_back(ctx.vk, extent);
|
||||
this->tempImages1.emplace_back(ctx.vk, extent);
|
||||
}
|
||||
this->image.emplace(ctx.vk,
|
||||
VkExtent2D { extent.width, extent.height },
|
||||
VK_FORMAT_R16G16B16A16_SFLOAT
|
||||
);
|
||||
|
||||
// create descriptor sets
|
||||
const auto& shaders = (ctx.perf ?
|
||||
ctx.shaders.get().performance : ctx.shaders.get().quality).gamma;
|
||||
this->sets.reserve(4);
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(sourceImages)
|
||||
.storages(this->tempImages0)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(1)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages0)
|
||||
.storages(this->tempImages1)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(2)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages1)
|
||||
.storages(this->tempImages0)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.build(ctx.vk, ctx.pool, shaders.at(3)));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampleds(this->tempImages0)
|
||||
.sampled(additionalInput0)
|
||||
.sampled(additionalInput1)
|
||||
.storage(*this->image)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.sampler(ctx.eabSampler)
|
||||
.buffer(ctx.constantBuffers.at(idx))
|
||||
.build(ctx.vk, ctx.pool, shaders.at(4)));
|
||||
|
||||
// store dispatch extents
|
||||
this->dispatchExtent = backend::add_shift_extent(extent, 7, 3);
|
||||
}
|
||||
|
||||
void Gamma1::prepare(std::vector<VkImage>& images) const {
|
||||
for (size_t i = 0; i < this->tempImages0.size(); i++) {
|
||||
images.push_back(this->tempImages0.at(i).handle());
|
||||
images.push_back(this->tempImages1.at(i).handle());
|
||||
}
|
||||
images.push_back(this->image->handle());
|
||||
}
|
||||
|
||||
void Gamma1::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const {
|
||||
for (const auto& set : this->sets)
|
||||
set.dispatch(vk, cmd, dispatchExtent);
|
||||
}
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../helpers/managed_shader.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace ctx { struct Ctx; }
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
/// gamma shaderchain
|
||||
class Gamma1 {
|
||||
public:
|
||||
/// create a gamma shaderchain
|
||||
/// @param ctx context
|
||||
/// @param idx generated frame index
|
||||
/// @param sourceImages source images
|
||||
/// @param additionalInput0 additional input image
|
||||
/// @param additionalInput1 additional input image
|
||||
Gamma1(const Ctx& ctx, size_t idx,
|
||||
const std::vector<vk::Image>& sourceImages,
|
||||
const vk::Image& additionalInput0,
|
||||
const vk::Image& additionalInput1);
|
||||
|
||||
/// prepare the shaderchain initially
|
||||
/// @param images vector to fill with image handles
|
||||
void prepare(std::vector<VkImage>& images) const;
|
||||
|
||||
/// render the gamma shaderchain
|
||||
/// @param vk the vulkan instance
|
||||
/// @param cmd command buffer
|
||||
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const;
|
||||
|
||||
/// get the generated image
|
||||
/// @return image
|
||||
[[nodiscard]] const auto& getImage() const { return *this->image; }
|
||||
private:
|
||||
std::vector<vk::Image> tempImages0;
|
||||
std::vector<vk::Image> tempImages1;
|
||||
ls::lazy<vk::Image> image;
|
||||
|
||||
std::vector<ManagedShader> sets;
|
||||
VkExtent2D dispatchExtent{};
|
||||
};
|
||||
}
|
||||
|
|
@ -1,57 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "generate.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
Generate::Generate(const Ctx& ctx, size_t idx,
|
||||
const std::pair<vk::Image, vk::Image>& sourceImages,
|
||||
const vk::Image& inputImage1,
|
||||
const vk::Image& inputImage2,
|
||||
const vk::Image& inputImage3,
|
||||
const vk::Image& outputImage) {
|
||||
// create descriptor sets
|
||||
const auto& shader = ctx.hdr ?
|
||||
ctx.shaders.get().generate_hdr : ctx.shaders.get().generate;
|
||||
this->sets.reserve(2);
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampled(sourceImages.second)
|
||||
.sampled(sourceImages.first)
|
||||
.sampled(inputImage1)
|
||||
.sampled(inputImage2)
|
||||
.sampled(inputImage3)
|
||||
.storage(outputImage)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.sampler(ctx.eabSampler)
|
||||
.buffer(ctx.constantBuffers.at(idx))
|
||||
.build(ctx.vk, ctx.pool, shader));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampled(sourceImages.first)
|
||||
.sampled(sourceImages.second)
|
||||
.sampled(inputImage1)
|
||||
.sampled(inputImage2)
|
||||
.sampled(inputImage3)
|
||||
.storage(outputImage)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.sampler(ctx.eabSampler)
|
||||
.buffer(ctx.constantBuffers.at(idx))
|
||||
.build(ctx.vk, ctx.pool, shader));
|
||||
|
||||
// store dispatch extent
|
||||
this->dispatchExtent = backend::add_shift_extent(ctx.sourceExtent, 15, 4);
|
||||
}
|
||||
|
||||
void Generate::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const {
|
||||
this->sets.at(idx % 2).dispatch(vk, cmd, this->dispatchExtent);
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../helpers/managed_shader.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace ctx { struct Ctx; }
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
/// generate shaderchain
|
||||
class Generate {
|
||||
public:
|
||||
/// create a generate shaderchain
|
||||
/// @param ctx context
|
||||
/// @param idx generated frame index
|
||||
/// @param sourceImages pair of source images
|
||||
/// @param inputImage1 input image 1
|
||||
/// @param inputImage2 input image 2
|
||||
/// @param inputImage3 input image 3
|
||||
Generate(const Ctx& ctx, size_t idx,
|
||||
const std::pair<vk::Image, vk::Image>& sourceImages,
|
||||
const vk::Image& inputImage1,
|
||||
const vk::Image& inputImage2,
|
||||
const vk::Image& inputImage3,
|
||||
const vk::Image& outputImage);
|
||||
|
||||
/// render the generate shaderchain
|
||||
/// @param vk the vulkan instance
|
||||
/// @param cmd command buffer
|
||||
/// @param idx frame index
|
||||
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const;
|
||||
private:
|
||||
std::vector<ManagedShader> sets;
|
||||
VkExtent2D dispatchExtent{};
|
||||
};
|
||||
}
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "mipmaps.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk::backend;
|
||||
|
||||
Mipmaps::Mipmaps(const Ctx& ctx,
|
||||
const std::pair<vk::Image, vk::Image>& sourceImages) {
|
||||
// create output images for base and 6 mips
|
||||
this->images.reserve(7);
|
||||
for (uint32_t i = 0; i < 7; i++)
|
||||
this->images.emplace_back(ctx.vk,
|
||||
backend::shift_extent(ctx.flowExtent, i), VK_FORMAT_R8_UNORM);
|
||||
|
||||
// create descriptor sets for both input images
|
||||
this->sets.reserve(2);
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampled(sourceImages.first)
|
||||
.storages(this->images)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.buffer(ctx.constantBuffer)
|
||||
.build(ctx.vk, ctx.pool, ctx.shaders.get().mipmaps));
|
||||
this->sets.emplace_back(ManagedShaderBuilder()
|
||||
.sampled(sourceImages.second)
|
||||
.storages(this->images)
|
||||
.sampler(ctx.bnbSampler)
|
||||
.buffer(ctx.constantBuffer)
|
||||
.build(ctx.vk, ctx.pool, ctx.shaders.get().mipmaps));
|
||||
|
||||
// store dispatch extent
|
||||
this->dispatchExtent = backend::add_shift_extent(ctx.flowExtent, 63, 6);
|
||||
}
|
||||
|
||||
void Mipmaps::prepare(std::vector<VkImage>& images) const {
|
||||
for (const auto& img : this->images)
|
||||
images.push_back(img.handle());
|
||||
}
|
||||
|
||||
void Mipmaps::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const {
|
||||
this->sets.at(idx % 2).dispatch(vk, cmd, this->dispatchExtent);
|
||||
}
|
||||
|
|
@ -1,47 +0,0 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "../helpers/managed_shader.hpp"
|
||||
#include "../helpers/utils.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace ctx { struct Ctx; }
|
||||
|
||||
namespace lsfgvk::backend {
|
||||
/// mipmaps shaderchain
|
||||
class Mipmaps {
|
||||
public:
|
||||
/// create a mipmaps shaderchain
|
||||
/// @param ctx context
|
||||
/// @param sourceImages pair of source images
|
||||
Mipmaps(const Ctx& ctx,
|
||||
const std::pair<vk::Image, vk::Image>& sourceImages);
|
||||
|
||||
/// prepare the shaderchain initially
|
||||
/// @param images vector to fill with image handles
|
||||
void prepare(std::vector<VkImage>& images) const;
|
||||
|
||||
/// render the mipmaps shaderchain
|
||||
/// @param vk the vulkan instance
|
||||
/// @param cmd command buffer
|
||||
/// @param idx frame index
|
||||
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const;
|
||||
|
||||
/// get the generated mipmap images
|
||||
/// @return vector of images
|
||||
[[nodiscard]] const auto& getImages() const { return this->images; }
|
||||
private:
|
||||
std::vector<vk::Image> images;
|
||||
|
||||
std::vector<ManagedShader> sets;
|
||||
VkExtent2D dispatchExtent{};
|
||||
};
|
||||
}
|
||||
578
lsfg-vk-backend/src/utility/pipelines.cpp
Normal file
578
lsfg-vk-backend/src/utility/pipelines.cpp
Normal file
|
|
@ -0,0 +1,578 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "pipelines.hpp"
|
||||
#include "modules/pipeline/signature.hpp"
|
||||
#include "modules/pipeline/signature/helpers.hpp"
|
||||
#include "modules/pipeline/signature/image.hpp"
|
||||
#include "modules/pipeline/signature/pass.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
using namespace lsfgvk;
|
||||
|
||||
namespace {
|
||||
using namespace lsfgvk::pipeline;
|
||||
|
||||
/// Build the pipeline signature
|
||||
consteval PipelineSignature buildPipelineSignature(bool perf) {
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wshadow"
|
||||
PipelineSignatureBuilder s;
|
||||
|
||||
const Resource INVALID{};
|
||||
|
||||
auto sourceImageArray = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.hdrFormat = Format::RGBA16161616,
|
||||
.flags = ImageFlag::Pinned |
|
||||
ImageFlag::ExternalInput |
|
||||
ImageFlag::HdrVariant,
|
||||
.count = 2
|
||||
});
|
||||
|
||||
/* Pre-pass */
|
||||
|
||||
auto mipmapImageArray = s.registerImage({
|
||||
.format = Format::R8,
|
||||
.flags = ImageFlag::Mipmaps,
|
||||
.extentOp = { true },
|
||||
.count = 7
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "mipmaps",
|
||||
.inputs{
|
||||
sourceImageArray
|
||||
},
|
||||
.outputs{
|
||||
mipmapImageArray
|
||||
},
|
||||
.dispatchOp = { 63, 6 }
|
||||
});
|
||||
|
||||
std::vector<size_t> alphaArray(7);
|
||||
std::vector<ExtentOp> alphaExtents(7);
|
||||
for (uint32_t i = 0; i < 7; i++) {
|
||||
const uint32_t mul = perf ? 1 : 2;
|
||||
const ExtentOp dispatch = { 7, 3 };
|
||||
|
||||
ExtentOp extent = { 0, 6 - i };
|
||||
extent += { 1, 1 };
|
||||
|
||||
auto flipflop0 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 1 * mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "alpha0",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
{ mipmapImageArray, 6 - i }
|
||||
},
|
||||
.outputs{
|
||||
flipflop0
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop1 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 1 * mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "alpha1",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
flipflop0
|
||||
},
|
||||
.outputs{
|
||||
flipflop1
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
extent += { 1, 1 };
|
||||
|
||||
auto flipflop2 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 2 * mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "alpha2",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
flipflop1
|
||||
},
|
||||
.outputs{
|
||||
flipflop2
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto result = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.flags = ImageFlag::Pinned,
|
||||
.extentOp = extent,
|
||||
.count = (2 * mul) * 3
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "alpha3",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
flipflop2
|
||||
},
|
||||
.outputs{
|
||||
result
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
alphaArray.at(6 - i) = result;
|
||||
alphaExtents.at(6 - i) = extent;
|
||||
}
|
||||
|
||||
ExtentOp extent = alphaExtents.at(0);
|
||||
ExtentOp dispatch = { 7, 3 };
|
||||
|
||||
auto flipflop0 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 2
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "beta0",
|
||||
.inputs{
|
||||
alphaArray.at(0)
|
||||
},
|
||||
.outputs{
|
||||
flipflop0
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop1 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 2
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "beta1",
|
||||
.inputs{
|
||||
flipflop0
|
||||
},
|
||||
.outputs{
|
||||
flipflop1
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop2 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 2
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "beta2",
|
||||
.inputs{
|
||||
flipflop1
|
||||
},
|
||||
.outputs{
|
||||
flipflop2
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop3 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 2
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "beta3",
|
||||
.inputs{
|
||||
flipflop2
|
||||
},
|
||||
.outputs{
|
||||
flipflop3
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto betaImageArray = s.registerImage({
|
||||
.format = Format::R8,
|
||||
.flags = ImageFlag::Mipmaps,
|
||||
.extentOp = extent,
|
||||
.count = 6
|
||||
});
|
||||
|
||||
dispatch = { 31, 5 };
|
||||
|
||||
s.appendPass({
|
||||
.shader = "beta4",
|
||||
.inputs{
|
||||
flipflop3
|
||||
},
|
||||
.outputs{
|
||||
betaImageArray
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
/* Main-pass */
|
||||
|
||||
s.split();
|
||||
|
||||
std::vector<size_t> gammaArray(7);
|
||||
std::vector<size_t> deltaArray(3);
|
||||
std::vector<size_t> epsilonArray(3);
|
||||
for (uint32_t i = 0; i < 7; i++) {
|
||||
const uint32_t mul = perf ? 1 : 2;
|
||||
const ExtentOp dispatch = { 7, 3 };
|
||||
const ExtentOp extent = alphaExtents.at(6 - i);
|
||||
|
||||
auto flipflop0 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 3
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "gamma0",
|
||||
.flags = PassFlag::Aggregate
|
||||
| (i == 0 ? PassFlag::Special : PassFlag::None),
|
||||
.inputs{
|
||||
alphaArray.at(6 - i),
|
||||
i == 0 ? INVALID : gammaArray.at(i - 1)
|
||||
},
|
||||
.outputs{
|
||||
flipflop0
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop1 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 2 * mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "gamma1",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
flipflop0
|
||||
},
|
||||
.outputs{
|
||||
flipflop1
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop2 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 2 * mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "gamma2",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
flipflop1
|
||||
},
|
||||
.outputs{
|
||||
flipflop2
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop3 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 2 * mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "gamma3",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
flipflop2
|
||||
},
|
||||
.outputs{
|
||||
flipflop3
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto result = s.registerImage({
|
||||
.format = Format::RGBA16161616,
|
||||
.extentOp = extent
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "gamma4",
|
||||
.flags = PassFlag::Aggregate
|
||||
| (i == 0 ? PassFlag::Special : PassFlag::None),
|
||||
.inputs{
|
||||
flipflop3,
|
||||
i == 0 ? INVALID : gammaArray.at(i - 1),
|
||||
{ betaImageArray, i == 0 ? 5 : (6 - i) }
|
||||
},
|
||||
.outputs{
|
||||
result
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
gammaArray.at(i) = result;
|
||||
|
||||
if (i >= 4) {
|
||||
auto flipflop0 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 3
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "delta0",
|
||||
.flags = PassFlag::Aggregate
|
||||
| (i == 4 ? PassFlag::Special : PassFlag::None),
|
||||
.inputs{
|
||||
alphaArray.at(6 - i),
|
||||
i == 4 ? INVALID : deltaArray.at(i - 5)
|
||||
},
|
||||
.outputs{
|
||||
flipflop0
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop1 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 2 * mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "delta1",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
flipflop0
|
||||
},
|
||||
.outputs{
|
||||
flipflop1
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop2 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 2 * mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "delta2",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
flipflop1
|
||||
},
|
||||
.outputs{
|
||||
flipflop2
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop3 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = 2 * mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "delta3",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
flipflop2
|
||||
},
|
||||
.outputs{
|
||||
flipflop3
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto result = s.registerImage({
|
||||
.format = Format::RGBA16161616,
|
||||
.extentOp = extent,
|
||||
.count = 1
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "delta4",
|
||||
.flags = PassFlag::Aggregate
|
||||
| (i == 4 ? PassFlag::Special : PassFlag::None),
|
||||
.inputs{
|
||||
flipflop3,
|
||||
i == 4 ? INVALID : deltaArray.at(i - 5),
|
||||
{ betaImageArray, 6 - i }
|
||||
},
|
||||
.outputs{
|
||||
result
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
deltaArray.at(i - 4) = result;
|
||||
}
|
||||
|
||||
if (i >= 4) {
|
||||
auto flipflop0 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "epsilon0",
|
||||
.flags = PassFlag::Aggregate
|
||||
| (i == 4 ? PassFlag::Special : PassFlag::None),
|
||||
.inputs{
|
||||
alphaArray.at(6 - i),
|
||||
gammaArray.at(i - 1),
|
||||
i == 4 ? INVALID : deltaArray.at(i - 5)
|
||||
},
|
||||
.outputs{
|
||||
flipflop0
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop1 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "epsilon1",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
flipflop0
|
||||
},
|
||||
.outputs{
|
||||
flipflop1
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop2 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "epsilon2",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
flipflop1
|
||||
},
|
||||
.outputs{
|
||||
flipflop2
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto flipflop3 = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.extentOp = extent,
|
||||
.count = mul
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "epsilon3",
|
||||
.flags = PassFlag::Aggregate,
|
||||
.inputs{
|
||||
flipflop2
|
||||
},
|
||||
.outputs{
|
||||
flipflop3
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
auto result = s.registerImage({
|
||||
.format = Format::RGBA16161616,
|
||||
.extentOp = extent,
|
||||
.count = 1
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "epsilon4",
|
||||
.flags = PassFlag::Aggregate
|
||||
| (i == 4 ? PassFlag::Special : PassFlag::None),
|
||||
.inputs{
|
||||
flipflop3,
|
||||
i == 4 ? INVALID : epsilonArray.at(i - 5)
|
||||
},
|
||||
.outputs{
|
||||
result
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
epsilonArray.at(i - 4) = result;
|
||||
}
|
||||
}
|
||||
|
||||
extent = { false };
|
||||
dispatch = { 15, 4 };
|
||||
|
||||
auto result = s.registerImage({
|
||||
.format = Format::RGBA8888,
|
||||
.hdrFormat = Format::RGBA16161616,
|
||||
.flags = ImageFlag::Pinned
|
||||
| ImageFlag::ExternalOutput
|
||||
| ImageFlag::HdrVariant,
|
||||
.extentOp = extent,
|
||||
.count = 1
|
||||
});
|
||||
|
||||
s.appendPass({
|
||||
.shader = "generate",
|
||||
.flags = PassFlag::HdrVariant,
|
||||
.inputs{
|
||||
sourceImageArray,
|
||||
gammaArray.at(6),
|
||||
deltaArray.at(2),
|
||||
epsilonArray.at(2)
|
||||
},
|
||||
.outputs{
|
||||
result
|
||||
},
|
||||
.dispatchOp = extent + dispatch
|
||||
});
|
||||
|
||||
return s.finalize();
|
||||
#pragma clang diagnostic pop
|
||||
}
|
||||
}
|
||||
|
||||
const PipelineSignature& lsfgvk::getPipelineSignature(bool perf) {
|
||||
static const PipelineSignature signature = buildPipelineSignature(false);
|
||||
static const PipelineSignature perfSignature = buildPipelineSignature(true);
|
||||
return perf ? perfSignature : signature;
|
||||
}
|
||||
17
lsfg-vk-backend/src/utility/pipelines.hpp
Normal file
17
lsfg-vk-backend/src/utility/pipelines.hpp
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "modules/pipeline/signature.hpp"
|
||||
|
||||
namespace lsfgvk {
|
||||
|
||||
///
|
||||
/// Get the pipeline signature
|
||||
///
|
||||
/// @param perf Performance mode
|
||||
/// @return Pipeline signature
|
||||
///
|
||||
const pipeline::PipelineSignature& getPipelineSignature(bool perf);
|
||||
|
||||
}
|
||||
656
lsfg-vk-backend/src/utility/vkhelper.cpp
Normal file
656
lsfg-vk-backend/src/utility/vkhelper.cpp
Normal file
|
|
@ -0,0 +1,656 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "vkhelper.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <bitset>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <iomanip>
|
||||
#include <ios>
|
||||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <sstream>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
/* Device initialization */
|
||||
|
||||
vk::UniqueInstance vkhelper::createInstance(vk::detail::DispatchLoaderDynamic& dld) {
|
||||
dld.init();
|
||||
|
||||
const vk::ApplicationInfo appInfo{
|
||||
.pApplicationName = "lsfg-vk",
|
||||
.applicationVersion = vk::makeVersion(2, 0, 0),
|
||||
.pEngineName = "lsfg-vk",
|
||||
.engineVersion = vk::makeVersion(2, 0, 0),
|
||||
.apiVersion = vk::ApiVersion12 // Fully supported by all Vulkan-capable GPUs
|
||||
};
|
||||
const vk::InstanceCreateInfo instanceInfo{
|
||||
.pApplicationInfo = &appInfo
|
||||
};
|
||||
auto instance{vk::createInstanceUnique(instanceInfo, nullptr, dld)};
|
||||
dld.init(*instance);
|
||||
|
||||
return instance;
|
||||
}
|
||||
|
||||
vk::PhysicalDevice vkhelper::findPhysicalDevice(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Instance& instance,
|
||||
const std::string& id
|
||||
) {
|
||||
for (const auto& physdev : instance.enumeratePhysicalDevices(dld)) {
|
||||
// Check for VK_EXT_pci_bus_info
|
||||
bool supportsPCIEXT{false};
|
||||
for (const auto& ext : physdev.enumerateDeviceExtensionProperties(nullptr, dld)) {
|
||||
if (std::string(ext.extensionName) != vk::EXTPciBusInfoExtensionName)
|
||||
continue;
|
||||
|
||||
supportsPCIEXT = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Fetch properties
|
||||
vk::PhysicalDevicePCIBusInfoPropertiesEXT busInfo{};
|
||||
vk::PhysicalDeviceProperties2 info{
|
||||
.pNext = supportsPCIEXT ? &busInfo : nullptr
|
||||
};
|
||||
physdev.getProperties2(&info, dld);
|
||||
|
||||
auto& props{info.properties};
|
||||
|
||||
// Check first if id is not given
|
||||
if (id.empty())
|
||||
return physdev;
|
||||
|
||||
// Compare device name
|
||||
props.deviceName.back() = '\0'; // Ensure null-termination
|
||||
if (id == std::string(props.deviceName))
|
||||
return physdev;
|
||||
|
||||
// Compare Vendor ID + Device ID
|
||||
std::ostringstream gpuss;
|
||||
gpuss << std::hex << std::setfill('0')
|
||||
<< std::setw(4) << props.vendorID << ":"
|
||||
<< std::setw(4) << props.deviceID;
|
||||
if (id == gpuss.str())
|
||||
return physdev;
|
||||
|
||||
// Compare PCI bus ID
|
||||
if (!supportsPCIEXT)
|
||||
continue;
|
||||
|
||||
std::ostringstream pciss;
|
||||
pciss << std::hex << std::setfill('0')
|
||||
<< std::setw(4) << busInfo.pciDomain << ":"
|
||||
<< std::setw(2) << busInfo.pciBus << ":"
|
||||
<< std::setw(2) << busInfo.pciDevice << "."
|
||||
<< std::setw(1) << busInfo.pciFunction;
|
||||
if (id == pciss.str())
|
||||
return physdev;
|
||||
}
|
||||
|
||||
throw std::runtime_error("No physical device matching '" + id + "' found");
|
||||
}
|
||||
|
||||
uint32_t vkhelper::findComputeQueueFamilyIndex(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::PhysicalDevice& physdev
|
||||
) {
|
||||
uint32_t idx{0};
|
||||
for (const auto& qfi : physdev.getQueueFamilyProperties2(dld)) {
|
||||
if (qfi.queueFamilyProperties.queueFlags & vk::QueueFlagBits::eCompute)
|
||||
return idx;
|
||||
idx++;
|
||||
}
|
||||
|
||||
throw std::runtime_error("No compute-capable queue family found");
|
||||
}
|
||||
|
||||
bool vkhelper::checkHalfPrecisionSupport(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::PhysicalDevice& physdev
|
||||
) {
|
||||
vk::PhysicalDeviceVulkan12Features featuresVulkan12{};
|
||||
vk::PhysicalDeviceFeatures2 features{
|
||||
.pNext = &featuresVulkan12
|
||||
};
|
||||
physdev.getFeatures2(&features, dld);
|
||||
return featuresVulkan12.shaderFloat16;
|
||||
}
|
||||
|
||||
std::pair<vk::UniqueDevice, vk::Queue> vkhelper::createDevice(
|
||||
vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
uint32_t qfi,
|
||||
bool fp16
|
||||
) {
|
||||
constexpr std::array<const char*, 3> EXTENSIONS{
|
||||
vk::KHRSynchronization2ExtensionName,
|
||||
vk::KHRExternalMemoryFdExtensionName,
|
||||
vk::KHRExternalSemaphoreFdExtensionName
|
||||
};
|
||||
|
||||
vk::PhysicalDeviceSynchronization2FeaturesKHR sync2Info{
|
||||
.synchronization2 = VK_TRUE
|
||||
};
|
||||
const vk::PhysicalDeviceVulkan12Features vk12Info{
|
||||
.pNext = &sync2Info,
|
||||
.shaderFloat16 = fp16,
|
||||
.timelineSemaphore = VK_TRUE
|
||||
};
|
||||
const float queuePriority{1.0F}; // Highest priority
|
||||
const vk::DeviceQueueCreateInfo queueInfo{
|
||||
.queueFamilyIndex = qfi,
|
||||
.queueCount = 1,
|
||||
.pQueuePriorities = &queuePriority
|
||||
};
|
||||
const vk::DeviceCreateInfo deviceInfo{
|
||||
.pNext = &vk12Info,
|
||||
.queueCreateInfoCount = 1,
|
||||
.pQueueCreateInfos = &queueInfo,
|
||||
.enabledExtensionCount = static_cast<uint32_t>(EXTENSIONS.size()),
|
||||
.ppEnabledExtensionNames = EXTENSIONS.data()
|
||||
};
|
||||
auto device{physdev.createDeviceUnique(deviceInfo, nullptr, dld)};
|
||||
dld.init(*device);
|
||||
|
||||
return{
|
||||
std::move(device),
|
||||
device->getQueue(qfi, 0, dld)
|
||||
};
|
||||
}
|
||||
|
||||
/* Shader modules & pipelines */
|
||||
|
||||
vk::UniqueShaderModule vkhelper::createShaderModule(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const std::span<const uint32_t>& code
|
||||
) {
|
||||
const vk::ShaderModuleCreateInfo shaderInfo{
|
||||
.codeSize = code.size() * sizeof(uint32_t),
|
||||
.pCode = code.data()
|
||||
};
|
||||
return device.createShaderModuleUnique(shaderInfo, nullptr, dld);
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// Find the cache file path
|
||||
std::filesystem::path findPipelineCache(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
std::string_view tag
|
||||
) {
|
||||
// First find the base path
|
||||
std::filesystem::path path{"/tmp/lsfg-vk"};
|
||||
|
||||
const char* xdgCacheHome{std::getenv("XDG_CACHE_HOME")};
|
||||
if (xdgCacheHome && *xdgCacheHome != '\0')
|
||||
path = std::filesystem::path(xdgCacheHome) / "lsfg-vk";
|
||||
|
||||
const char* home{std::getenv("HOME")};
|
||||
if (home && *home != '\0')
|
||||
path = std::filesystem::path(home) / ".cache" / "lsfg-vk";
|
||||
|
||||
// Ensure the directory exists
|
||||
if (!std::filesystem::exists(path))
|
||||
std::filesystem::create_directories(path);
|
||||
|
||||
// Calculate the physical device UUID
|
||||
vk::PhysicalDeviceProperties2 info{};
|
||||
physdev.getProperties2(&info, dld);
|
||||
|
||||
std::ostringstream ss;
|
||||
ss << std::hex << std::setfill('0');
|
||||
for (uint32_t i = 0; i < 16; i++) {
|
||||
ss << std::setw(2) << static_cast<uint32_t>(info.properties.pipelineCacheUUID.at(i));
|
||||
if (i == 3 || i == 5 || i == 7 || i == 9) {
|
||||
ss << "-";
|
||||
}
|
||||
}
|
||||
|
||||
// Return the full path
|
||||
return path / ("cache_" + std::string(tag) + "_" + ss.str() + ".bin");
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<vk::UniquePipelineCache, bool> vkhelper::createPipelineCache(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
std::string_view tag
|
||||
) {
|
||||
const std::filesystem::path path{findPipelineCache(dld, physdev, tag)};
|
||||
const bool valid{std::filesystem::exists(path) && std::filesystem::file_size(path) > 32};
|
||||
|
||||
// Read cache data (if any)
|
||||
std::vector<uint8_t> cacheData{};
|
||||
if (std::filesystem::exists(path)) {
|
||||
std::ifstream file(path, std::ios::binary | std::ios::ate);
|
||||
if (!file.is_open())
|
||||
throw std::runtime_error("Unable to open pipeline cache file for reading");
|
||||
|
||||
const std::streamsize size{static_cast<std::streamsize>(file.tellg())};
|
||||
cacheData = std::vector<uint8_t>(static_cast<size_t>(size));
|
||||
|
||||
file.seekg(0, std::ios::beg);
|
||||
if (!file.read(reinterpret_cast<char*>(cacheData.data()), size)) // NOLINT (unsafe cast)
|
||||
throw std::runtime_error("Unable to read pipeline cache file");
|
||||
}
|
||||
|
||||
// Build pipeline cache
|
||||
const vk::PipelineCacheCreateInfo pipelineCacheInfo{
|
||||
.initialDataSize = cacheData.size(),
|
||||
.pInitialData = cacheData.data()
|
||||
};
|
||||
return { device.createPipelineCacheUnique(pipelineCacheInfo, nullptr, dld), valid };
|
||||
}
|
||||
|
||||
void vkhelper::persistPipelineCache(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
const vk::PipelineCache& cache,
|
||||
std::string_view tag
|
||||
) {
|
||||
const std::filesystem::path path{findPipelineCache(dld, physdev, tag)};
|
||||
|
||||
std::ofstream file(path, std::ios::binary | std::ios::trunc);
|
||||
if (!file.is_open())
|
||||
throw std::runtime_error("Unable to open pipeline cache file for writing");
|
||||
|
||||
const std::vector<uint8_t> cacheData{
|
||||
device.getPipelineCacheData(cache, dld)
|
||||
};
|
||||
file.write(
|
||||
reinterpret_cast<const char*>(cacheData.data()), // NOLINT (unsafe cast)
|
||||
static_cast<std::streamsize>(cacheData.size())
|
||||
);
|
||||
|
||||
file.flush();
|
||||
file.close();
|
||||
}
|
||||
|
||||
std::pair<vk::UniqueDescriptorSetLayout, vk::UniquePipelineLayout> vkhelper::createLayout(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
|
||||
size_t pushConstantSize
|
||||
) {
|
||||
const vk::DescriptorSetLayoutCreateInfo layoutInfo{
|
||||
.flags = vk::DescriptorSetLayoutCreateFlagBits::eUpdateAfterBindPool,
|
||||
.bindingCount = static_cast<uint32_t>(bindings.size()),
|
||||
.pBindings = bindings.data()
|
||||
};
|
||||
auto descriptorSetLayout{device.createDescriptorSetLayoutUnique(layoutInfo, nullptr, dld)};
|
||||
|
||||
const vk::PushConstantRange pushConstantRange{
|
||||
.stageFlags = vk::ShaderStageFlagBits::eCompute,
|
||||
.size = static_cast<uint32_t>(pushConstantSize)
|
||||
};
|
||||
const vk::PipelineLayoutCreateInfo pipelineLayoutInfo{
|
||||
.setLayoutCount = 1,
|
||||
.pSetLayouts = &*descriptorSetLayout,
|
||||
.pushConstantRangeCount = 1,
|
||||
.pPushConstantRanges = &pushConstantRange
|
||||
};
|
||||
auto pipelineLayout{device.createPipelineLayoutUnique(pipelineLayoutInfo, nullptr, dld)};
|
||||
|
||||
return { std::move(descriptorSetLayout), std::move(pipelineLayout) };
|
||||
}
|
||||
|
||||
/* Resources */
|
||||
|
||||
vk::UniqueImage vkhelper::createImage(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
vk::Extent2D extent,
|
||||
vk::Format format,
|
||||
uint32_t layers,
|
||||
vk::ImageUsageFlags usage
|
||||
) {
|
||||
const vk::ImageCreateInfo imageInfo{
|
||||
.imageType = vk::ImageType::e2D,
|
||||
.format = format,
|
||||
.extent = {
|
||||
.width = extent.width,
|
||||
.height = extent.height,
|
||||
.depth = 1
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = layers,
|
||||
.samples = vk::SampleCountFlagBits::e1,
|
||||
.usage = usage
|
||||
};
|
||||
return device.createImageUnique(imageInfo, nullptr, dld);
|
||||
}
|
||||
|
||||
vk::UniqueSampler vkhelper::createSampler(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
vk::SamplerAddressMode mode,
|
||||
vk::CompareOp compare,
|
||||
bool white
|
||||
) {
|
||||
const vk::SamplerCreateInfo samplerInfo{
|
||||
.magFilter = vk::Filter::eLinear,
|
||||
.minFilter = vk::Filter::eLinear,
|
||||
.mipmapMode = vk::SamplerMipmapMode::eLinear,
|
||||
.addressModeU = mode,
|
||||
.addressModeV = mode,
|
||||
.addressModeW = mode,
|
||||
.compareOp = compare,
|
||||
.maxLod = vk::LodClampNone,
|
||||
.borderColor = white ?
|
||||
vk::BorderColor::eFloatOpaqueWhite : vk::BorderColor::eFloatTransparentBlack
|
||||
};
|
||||
return device.createSamplerUnique(samplerInfo, nullptr, dld);
|
||||
}
|
||||
|
||||
std::pair<vk::UniqueBuffer, vk::UniqueDeviceMemory> vkhelper::createBuffer(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
vk::BufferUsageFlags usage,
|
||||
const void* data,
|
||||
size_t size
|
||||
) {
|
||||
// Create buffer
|
||||
const vk::BufferCreateInfo bufferInfo{
|
||||
.size = size,
|
||||
.usage = usage,
|
||||
.sharingMode = vk::SharingMode::eExclusive
|
||||
};
|
||||
auto buffer{device.createBufferUnique(bufferInfo, nullptr, dld)};
|
||||
|
||||
// Allocate memory
|
||||
const auto requirements{device.getBufferMemoryRequirements(*buffer, dld)};
|
||||
|
||||
auto memory{vkhelper::allocateMemory(
|
||||
dld,
|
||||
device,
|
||||
physdev,
|
||||
requirements.size,
|
||||
requirements.memoryTypeBits,
|
||||
true
|
||||
)};
|
||||
|
||||
// Bind memory
|
||||
device.bindBufferMemory(*buffer, *memory, 0, dld);
|
||||
|
||||
// Copy data
|
||||
if (data) {
|
||||
void* mapped{device.mapMemory(*memory, 0, size, {}, dld)};
|
||||
std::copy_n(
|
||||
reinterpret_cast<const uint8_t*>(data), // NOLINT (unsafe cast)
|
||||
size,
|
||||
reinterpret_cast<uint8_t*>(mapped) // NOLINT (unsafe cast)
|
||||
);
|
||||
device.unmapMemory(*memory, dld);
|
||||
}
|
||||
|
||||
return {
|
||||
std::move(buffer),
|
||||
std::move(memory)
|
||||
};
|
||||
}
|
||||
|
||||
/* Memory allocations */
|
||||
|
||||
vk::UniqueDeviceMemory vkhelper::allocateMemory(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
vk::DeviceSize size,
|
||||
std::bitset<32> types,
|
||||
bool hostVisible
|
||||
) {
|
||||
// Find a suitable memory type index
|
||||
const auto memProps{physdev.getMemoryProperties2(dld)};
|
||||
|
||||
std::optional<uint32_t> selectedTypeIdx{};
|
||||
for (uint32_t i = 0; i < memProps.memoryProperties.memoryTypeCount; i++) {
|
||||
if (!types.test(i))
|
||||
continue;
|
||||
const auto& memType{memProps.memoryProperties.memoryTypes.at(i)};
|
||||
|
||||
const bool isHostVisible{
|
||||
memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible &&
|
||||
memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent
|
||||
};
|
||||
if (hostVisible && !isHostVisible)
|
||||
continue;
|
||||
|
||||
selectedTypeIdx = i;
|
||||
|
||||
if (memType.propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal)
|
||||
break;
|
||||
|
||||
// Fallback to host-visible memory if no device-local memory is available
|
||||
}
|
||||
|
||||
if (!selectedTypeIdx)
|
||||
throw std::runtime_error("No suitable memory type found for allocation");
|
||||
|
||||
// Allocate memory
|
||||
const vk::MemoryAllocateInfo allocInfo{
|
||||
.allocationSize = size,
|
||||
.memoryTypeIndex = *selectedTypeIdx
|
||||
};
|
||||
return device.allocateMemoryUnique(allocInfo, nullptr, dld);
|
||||
}
|
||||
|
||||
/* Descriptors */
|
||||
|
||||
std::pair<vk::UniqueDescriptorPool, vk::DescriptorSet> vkhelper::createDescriptorSet(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::DescriptorSetLayout& layout,
|
||||
uint32_t samplers, uint32_t buffers,
|
||||
uint32_t sampledImages, uint32_t storageImages
|
||||
) {
|
||||
const std::array<vk::DescriptorPoolSize, 4> poolSizes{{
|
||||
{ .type = vk::DescriptorType::eSampler,
|
||||
.descriptorCount = samplers },
|
||||
{ .type = vk::DescriptorType::eSampledImage,
|
||||
.descriptorCount = sampledImages },
|
||||
{ .type = vk::DescriptorType::eStorageImage,
|
||||
.descriptorCount = storageImages },
|
||||
{ .type = vk::DescriptorType::eUniformBuffer,
|
||||
.descriptorCount = buffers }
|
||||
}};
|
||||
auto pool{device.createDescriptorPoolUnique({
|
||||
.flags = vk::DescriptorPoolCreateFlagBits::eUpdateAfterBind,
|
||||
.maxSets = 1,
|
||||
.poolSizeCount = static_cast<uint32_t>(poolSizes.size()),
|
||||
.pPoolSizes = poolSizes.data()
|
||||
}, nullptr, dld)};
|
||||
|
||||
auto set{device.allocateDescriptorSets({
|
||||
.descriptorPool = *pool,
|
||||
.descriptorSetCount = 1,
|
||||
.pSetLayouts = &layout
|
||||
}, dld).at(0)};
|
||||
|
||||
return{
|
||||
std::move(pool),
|
||||
set
|
||||
};
|
||||
}
|
||||
|
||||
vk::UniqueImageView vkhelper::createImageView(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::Image& image,
|
||||
vk::Format format,
|
||||
uint32_t layers
|
||||
) {
|
||||
const vk::ImageViewCreateInfo viewInfo{
|
||||
.image = image,
|
||||
.viewType = layers == 1 ? vk::ImageViewType::e2D : vk::ImageViewType::e2DArray,
|
||||
.format = format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = vk::ImageAspectFlagBits::eColor,
|
||||
.levelCount = 1,
|
||||
.layerCount = layers
|
||||
}
|
||||
};
|
||||
return device.createImageViewUnique(viewInfo, nullptr, dld);
|
||||
}
|
||||
|
||||
/* Command buffers */
|
||||
|
||||
vk::UniqueCommandPool vkhelper::createCommandPool(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
uint32_t qfi
|
||||
) {
|
||||
const vk::CommandPoolCreateInfo cmdpoolInfo{
|
||||
.queueFamilyIndex = qfi
|
||||
};
|
||||
return device.createCommandPoolUnique(cmdpoolInfo, nullptr, dld);
|
||||
}
|
||||
|
||||
vk::UniqueCommandBuffer vkhelper::createCommandBuffer(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::CommandPool& cmdpool
|
||||
) {
|
||||
const vk::CommandBufferAllocateInfo cmdbufInfo{
|
||||
.commandPool = cmdpool,
|
||||
.commandBufferCount = 1
|
||||
};
|
||||
return { std::move(device.allocateCommandBuffersUnique(cmdbufInfo, dld).front()) };
|
||||
}
|
||||
|
||||
vk::UniqueSemaphore vkhelper::createTimelineSemaphore(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
bool exportable
|
||||
) {
|
||||
const vk::ExportSemaphoreCreateInfo exportInfo{
|
||||
.handleTypes = vk::ExternalSemaphoreHandleTypeFlagBits::eOpaqueFd
|
||||
};
|
||||
const vk::SemaphoreTypeCreateInfo typeInfo{
|
||||
.pNext = exportable ? &exportInfo : nullptr,
|
||||
.semaphoreType = vk::SemaphoreType::eTimeline,
|
||||
};
|
||||
const vk::SemaphoreCreateInfo createInfo{
|
||||
.pNext = &typeInfo,
|
||||
};
|
||||
return device.createSemaphoreUnique(createInfo, nullptr, dld);
|
||||
}
|
||||
|
||||
vk::UniqueFence vkhelper::createFence(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device
|
||||
) {
|
||||
return device.createFenceUnique({}, nullptr, dld);
|
||||
}
|
||||
|
||||
/* External memory */
|
||||
|
||||
std::pair<vk::UniqueImage, vk::UniqueDeviceMemory> vkhelper::createExternalImage(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
vk::Extent2D extent,
|
||||
vk::Format format,
|
||||
uint32_t layers,
|
||||
vk::ImageUsageFlags usage
|
||||
) {
|
||||
const vk::ExternalMemoryImageCreateInfo externalInfo{
|
||||
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eOpaqueFd
|
||||
};
|
||||
const vk::ImageCreateInfo imageInfo{
|
||||
.pNext = &externalInfo,
|
||||
.imageType = vk::ImageType::e2D,
|
||||
.format = format,
|
||||
.extent = {
|
||||
.width = extent.width,
|
||||
.height = extent.height,
|
||||
.depth = 1
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = layers,
|
||||
.samples = vk::SampleCountFlagBits::e1,
|
||||
.usage = usage
|
||||
};
|
||||
auto image{device.createImageUnique(imageInfo, nullptr, dld)};
|
||||
|
||||
// Find a suitable memory type index
|
||||
const auto memProps{physdev.getMemoryProperties2(dld)};
|
||||
const auto requirements{device.getImageMemoryRequirements(*image, dld)};
|
||||
|
||||
std::optional<uint32_t> selectedTypeIdx{};
|
||||
for (uint32_t i = 0; i < memProps.memoryProperties.memoryTypeCount; i++) {
|
||||
if (!std::bitset<32>(requirements.memoryTypeBits).test(i))
|
||||
continue;
|
||||
const auto& memType{memProps.memoryProperties.memoryTypes.at(i)};
|
||||
|
||||
if (memType.propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal) {
|
||||
selectedTypeIdx = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!selectedTypeIdx)
|
||||
throw std::runtime_error("No suitable memory type found for allocation");
|
||||
|
||||
// Allocate memory
|
||||
const vk::MemoryDedicatedAllocateInfo dedicatedInfo{
|
||||
.image = *image,
|
||||
};
|
||||
const vk::ExportMemoryAllocateInfo exportInfo{
|
||||
.pNext = &dedicatedInfo,
|
||||
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eOpaqueFd
|
||||
};
|
||||
const vk::MemoryAllocateInfo allocInfo{
|
||||
.pNext = &exportInfo,
|
||||
.allocationSize = requirements.size,
|
||||
.memoryTypeIndex = *selectedTypeIdx
|
||||
};
|
||||
auto memory{device.allocateMemoryUnique(allocInfo, nullptr, dld)};
|
||||
|
||||
// Bind memory
|
||||
device.bindImageMemory(*image, *memory, 0, dld);
|
||||
|
||||
return{
|
||||
std::move(image),
|
||||
std::move(memory)
|
||||
};
|
||||
}
|
||||
|
||||
int vkhelper::exportMemoryFd(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::DeviceMemory& memory
|
||||
) {
|
||||
const vk::MemoryGetFdInfoKHR fdInfo{
|
||||
.memory = memory,
|
||||
.handleType = vk::ExternalMemoryHandleTypeFlagBits::eOpaqueFd
|
||||
};
|
||||
return device.getMemoryFdKHR(fdInfo, dld);
|
||||
}
|
||||
|
||||
int vkhelper::exportSemaphoreFd(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::Semaphore& semaphore
|
||||
) {
|
||||
const vk::SemaphoreGetFdInfoKHR fdInfo{
|
||||
.semaphore = semaphore,
|
||||
.handleType = vk::ExternalSemaphoreHandleTypeFlagBits::eOpaqueFd
|
||||
};
|
||||
return device.getSemaphoreFdKHR(fdInfo, dld);
|
||||
}
|
||||
449
lsfg-vk-backend/src/utility/vkhelper.hpp
Normal file
449
lsfg-vk-backend/src/utility/vkhelper.hpp
Normal file
|
|
@ -0,0 +1,449 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#pragma once
|
||||
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#define VULKAN_HPP_TYPESAFE_CONVERSION 0
|
||||
#define VULKAN_HPP_NO_DEFAULT_DISPATCHER 1
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS 1
|
||||
#define VULKAN_HPP_NO_SETTERS 1
|
||||
#define VULKAN_HPP_NO_SPACESHIP_OPERATOR 1
|
||||
#define VULKAN_HPP_NO_TO_STRING 1
|
||||
#include <vulkan/vulkan.hpp> // IWYU pragma: export
|
||||
|
||||
// IWYU pragma: begin_exports
|
||||
#include <vulkan/vulkan_core.h>
|
||||
#include <vulkan/vulkan_enums.hpp>
|
||||
#include <vulkan/vulkan_funcs.hpp>
|
||||
#include <vulkan/vulkan_handles.hpp>
|
||||
#include <vulkan/vulkan_hpp_macros.hpp>
|
||||
#include <vulkan/vulkan_structs.hpp>
|
||||
// IWYU pragma: end_exports
|
||||
|
||||
#include <bitset>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <span>
|
||||
#include <string>
|
||||
#include <string_view>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace vkhelper {
|
||||
|
||||
/* Device initialization */
|
||||
|
||||
///
|
||||
/// Create a Vulkan 1.2 instance for lsfg-vk
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @return RAII-wrapped Vulkan instance
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
vk::UniqueInstance createInstance(vk::detail::DispatchLoaderDynamic& dld);
|
||||
|
||||
///
|
||||
/// Find a physical device through a custom identifier
|
||||
///
|
||||
/// The custom identifier may be one of:
|
||||
/// - Device name (e.g. "NVIDIA GeForce RTX 5080")
|
||||
/// - Vendor ID + Device ID in lowercase hexadecimal (e.g. "10de:2c02")
|
||||
/// - PCI bus ID with padded zeroes (e.g. "0000:01:00.0")
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param instance Vulkan instance
|
||||
/// @param id Custom identifier
|
||||
/// @return Selected physical device
|
||||
/// @throws std::runtime_error if no suitable device found
|
||||
///
|
||||
vk::PhysicalDevice findPhysicalDevice(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Instance& instance,
|
||||
const std::string& id
|
||||
);
|
||||
|
||||
///
|
||||
/// Find the first compute-capable queue family index
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param physdev Physical device
|
||||
/// @return Queue family index
|
||||
/// @throws std::runtime_error if no compute-capable queue found
|
||||
///
|
||||
uint32_t findComputeQueueFamilyIndex(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::PhysicalDevice& physdev
|
||||
);
|
||||
|
||||
///
|
||||
/// Check a physical device for half-precision float support
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param physdev Physical device
|
||||
/// @return Whether half-precision float is supported
|
||||
///
|
||||
bool checkHalfPrecisionSupport(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::PhysicalDevice& physdev
|
||||
);
|
||||
|
||||
///
|
||||
/// Create a Vulkan device for lsfg-vk
|
||||
///
|
||||
/// This device will have the core features timelineSemaphore and shaderFloat16 (if requested)
|
||||
/// enabled, as well as the synchronization2, external memory & semaphore fd extensions.
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param physdev Physical device
|
||||
/// @param qfi Queue family index of compute-capable queue
|
||||
/// @param fp16 Whether to enable half-precision float support
|
||||
/// @return RAII-wrapped Vulkan device & compute queue
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
std::pair<vk::UniqueDevice, vk::Queue> createDevice(
|
||||
vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
uint32_t qfi,
|
||||
bool fp16
|
||||
);
|
||||
|
||||
/* Shader modules & pipelines */
|
||||
|
||||
///
|
||||
/// Create a Vulkan shader module from SPIR-V bytecode
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param code SPIR-V bytecode
|
||||
/// @return RAII-wrapped Vulkan shader module
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
vk::UniqueShaderModule createShaderModule(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const std::span<const uint32_t>& code
|
||||
);
|
||||
|
||||
///
|
||||
/// Create and maintain the Vulkan pipeline cache for lsfg-vk
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param physdev Physical device
|
||||
/// @param tag Cache tag for different pipelines
|
||||
/// @return RAII-wrapped Vulkan pipeline cache
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
std::pair<vk::UniquePipelineCache, bool> createPipelineCache(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
std::string_view tag
|
||||
);
|
||||
|
||||
///
|
||||
/// Persist the Vulkan pipeline cache to disk
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param physdev Physical device
|
||||
/// @param cache Vulkan pipeline cache
|
||||
/// @param tag Cache tag for different pipelines
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
void persistPipelineCache(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
const vk::PipelineCache& cache,
|
||||
std::string_view tag
|
||||
);
|
||||
|
||||
///
|
||||
/// Create a Vulkan descriptor set layout
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param bindings List of descriptor set layout bindings
|
||||
/// @param pushConstantSize Size of push constant range
|
||||
/// @return RAII-wrapped Vulkan descriptor set & pipeline layout
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
std::pair<vk::UniqueDescriptorSetLayout, vk::UniquePipelineLayout> createLayout(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
|
||||
size_t pushConstantSize
|
||||
);
|
||||
|
||||
/* Resources */
|
||||
|
||||
///
|
||||
/// Create a (unallocated) Vulkan image for lsfg-vk
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param extent Image extent
|
||||
/// @param format Image format
|
||||
/// @param layers Amount of images
|
||||
/// @param usage Image usage flags
|
||||
/// @return RAII-wrapped Vulkan image
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
vk::UniqueImage createImage(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
vk::Extent2D extent,
|
||||
vk::Format format,
|
||||
uint32_t layers,
|
||||
vk::ImageUsageFlags usage
|
||||
);
|
||||
|
||||
///
|
||||
/// Create a Vulkan sampler for lsfg-vk
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param mode Address mode
|
||||
/// @param compare Comparison mode
|
||||
/// @param white Black/White border color
|
||||
/// @return RAII-wrapped Vulkan sampler
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
vk::UniqueSampler createSampler(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
vk::SamplerAddressMode mode,
|
||||
vk::CompareOp compare,
|
||||
bool white
|
||||
);
|
||||
|
||||
// (forward decl)
|
||||
std::pair<vk::UniqueBuffer, vk::UniqueDeviceMemory> createBuffer(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
vk::BufferUsageFlags usage,
|
||||
const void* data,
|
||||
size_t size
|
||||
);
|
||||
|
||||
///
|
||||
/// Create a Vulkan buffer for lsfg-vk
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param physdev Physical device
|
||||
/// @param data Buffer contained data
|
||||
/// @return RAII-wrapped Vulkan uniform buffer & device memory
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
template<typename T>
|
||||
std::pair<vk::UniqueBuffer, vk::UniqueDeviceMemory> createBuffer(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
const T& data
|
||||
) {
|
||||
return createBuffer(
|
||||
dld,
|
||||
device,
|
||||
physdev,
|
||||
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eTransferDst,
|
||||
static_cast<const void*>(&data),
|
||||
sizeof(T)
|
||||
);
|
||||
}
|
||||
|
||||
/* Memory allocations */
|
||||
|
||||
///
|
||||
/// Create a Vulkan memory allocation
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param physdev Physical device
|
||||
/// @param size Allocation size
|
||||
/// @param types Valid memory type bits
|
||||
/// @param hostVisible Require host visible memory
|
||||
/// @return RAII-wrapped Vulkan device memory
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
vk::UniqueDeviceMemory allocateMemory(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
vk::DeviceSize size,
|
||||
std::bitset<32> types,
|
||||
bool hostVisible = false
|
||||
);
|
||||
|
||||
///
|
||||
/// Align a memory allocation
|
||||
///
|
||||
/// @param size Memory size
|
||||
/// @param align Alignment
|
||||
/// @return Aligned memory size
|
||||
///
|
||||
inline vk::DeviceSize align(vk::DeviceSize size, vk::DeviceSize align) noexcept {
|
||||
return (size + align - 1) & ~(align - 1);
|
||||
}
|
||||
|
||||
/* Descriptors */
|
||||
|
||||
///
|
||||
/// Create a Vulkan descriptor set for lsfg-vk
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param layout Descriptor set layout
|
||||
/// @param samplers Amount of samplers
|
||||
/// @param buffers Amount of buffers
|
||||
/// @param sampledImages Amount of sampled images
|
||||
/// @param storageImages Amount of storage images
|
||||
/// @return Vulkan descriptor pool & set
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
std::pair<vk::UniqueDescriptorPool, vk::DescriptorSet> createDescriptorSet(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::DescriptorSetLayout& layout,
|
||||
uint32_t samplers, uint32_t buffers,
|
||||
uint32_t sampledImages, uint32_t storageImages
|
||||
);
|
||||
|
||||
///
|
||||
/// Create an image view
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param image Vulkan image
|
||||
/// @param format Image format
|
||||
/// @param layers Amount of layers in image
|
||||
/// @return RAII-wrapped Vulkan image view
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
vk::UniqueImageView createImageView(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::Image& image,
|
||||
vk::Format format,
|
||||
uint32_t layers
|
||||
);
|
||||
|
||||
/* Command buffers */
|
||||
|
||||
///
|
||||
/// Create a Vulkan command pool for lsfg-vk
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param qfi Queue family index
|
||||
/// @return RAII-wrapped Vulkan command pool
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
vk::UniqueCommandPool createCommandPool(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
uint32_t qfi
|
||||
);
|
||||
|
||||
///
|
||||
/// Create a Vulkan command buffer for lsfg-vk
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param cmdpool Vulkan command pool
|
||||
/// @return RAII-wrapped Vulkan command buffer
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
vk::UniqueCommandBuffer createCommandBuffer(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::CommandPool& cmdpool
|
||||
);
|
||||
|
||||
///
|
||||
/// Create a timeline semaphore
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param exportable Whether the semaphore should be exportable as a fd
|
||||
/// @return RAII-wrapped Vulkan semaphore
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
vk::UniqueSemaphore createTimelineSemaphore(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
bool exportable = false
|
||||
);
|
||||
|
||||
///
|
||||
/// Create a fence
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @return RAII-wrapped Vulkan fence
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
vk::UniqueFence createFence(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device
|
||||
);
|
||||
|
||||
/* External memory */
|
||||
|
||||
///
|
||||
/// Create a Vulkan image with a fd-exportable dedicated allocation
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param physdev Physical device
|
||||
/// @param extent Image extent
|
||||
/// @param format Image format
|
||||
/// @param layers Amount of images
|
||||
/// @param usage Image usage flags
|
||||
/// @return RAII-wrapped Vulkan image
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
std::pair<vk::UniqueImage, vk::UniqueDeviceMemory> createExternalImage(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::PhysicalDevice& physdev,
|
||||
vk::Extent2D extent,
|
||||
vk::Format format,
|
||||
uint32_t layers,
|
||||
vk::ImageUsageFlags usage
|
||||
);
|
||||
|
||||
///
|
||||
/// Export a Vulkan memory allocation as a fd
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param memory Vulkan device memory
|
||||
/// @return File descriptor to the allocation
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
int exportMemoryFd(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::DeviceMemory& memory
|
||||
);
|
||||
|
||||
///
|
||||
/// Export a Vulkan semaphore as a fd
|
||||
///
|
||||
/// @param dld Dynamic dispatch loader
|
||||
/// @param device Vulkan device
|
||||
/// @param semaphore Vulkan semaphore
|
||||
/// @return File descriptor to the semaphore
|
||||
/// @throws std::runtime_error on failure
|
||||
///
|
||||
int exportSemaphoreFd(
|
||||
const vk::detail::DispatchLoaderDynamic& dld,
|
||||
const vk::Device& device,
|
||||
const vk::Semaphore& semaphore
|
||||
);
|
||||
|
||||
}
|
||||
|
|
@ -6,12 +6,14 @@ set(CLI_SOURCES
|
|||
|
||||
add_executable(lsfg-vk-cli ${CLI_SOURCES})
|
||||
|
||||
target_include_directories(lsfg-vk-cli SYSTEM
|
||||
PRIVATE thirdparty/include)
|
||||
|
||||
target_link_libraries(lsfg-vk-cli
|
||||
PUBLIC lsfg-vk-common
|
||||
PUBLIC lsfg-vk-backend)
|
||||
|
||||
target_compile_options(lsfg-vk-cli PRIVATE
|
||||
-Wno-unknown-warning-option
|
||||
-Wno-unsafe-buffer-usage) # CLI parsing
|
||||
|
||||
install(TARGETS lsfg-vk-cli
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@
|
|||
using namespace lsfgvk::cli;
|
||||
|
||||
namespace {
|
||||
/// print usage information
|
||||
/// Print usage information
|
||||
void usage(const std::string& prog) {
|
||||
std::cerr <<
|
||||
R"(Validate, benchmark, and debug lsfg-vk.
|
||||
|
|
@ -37,7 +37,7 @@ SUBCOMMAND OPTIONS:
|
|||
-c, --config <PATH> Optional path to the configuration file
|
||||
|
||||
benchmark & debug
|
||||
-d, --dll <PATH> Path to Lossless.dll
|
||||
-d, --dll <PATH> Path to lsfg-vk.dll
|
||||
-a, --allow-fp16 Allow FP16 acceleration
|
||||
-w, --width <INT> Width of the input frames
|
||||
-h, --height <INT> Height of the input frames
|
||||
|
|
@ -53,7 +53,7 @@ SUBCOMMAND OPTIONS:
|
|||
<folder> Path to the debug frames)" << '\n';
|
||||
}
|
||||
|
||||
/// parse the validate command options
|
||||
/// Parse the validate command options
|
||||
[[noreturn]] void on_validate(int argc, char** argv) {
|
||||
validate::Options opts{};
|
||||
|
||||
|
|
@ -83,7 +83,7 @@ SUBCOMMAND OPTIONS:
|
|||
std::exit(validate::run(opts));
|
||||
}
|
||||
|
||||
/// parse the benchmark command options
|
||||
/// Parse the benchmark command options
|
||||
[[noreturn]] void on_benchmark(int argc, char** argv) {
|
||||
benchmark::Options opts{};
|
||||
|
||||
|
|
@ -145,7 +145,7 @@ SUBCOMMAND OPTIONS:
|
|||
std::exit(benchmark::run(opts));
|
||||
}
|
||||
|
||||
/// parse the debug command options
|
||||
/// Parse the debug command options
|
||||
[[noreturn]] void on_debug(int argc, char** argv) {
|
||||
debug::Options opts{};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,12 +1,12 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "benchmark.hpp"
|
||||
#include "lsfg-vk-backend/lsfgvk.hpp"
|
||||
#include "lsfg-vk-common/helpers/errors.hpp"
|
||||
#include "lsfg-vk-common/helpers/paths.hpp"
|
||||
#include "lsfg-vk-common/vulkan/image.hpp"
|
||||
#include "lsfg-vk-common/vulkan/timeline_semaphore.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
#include "lsfg-vk/lsfgvk.hpp"
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
|
|
@ -18,7 +18,6 @@
|
|||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include <time.h>
|
||||
|
|
@ -29,7 +28,7 @@ using namespace lsfgvk::cli;
|
|||
using namespace lsfgvk::cli::benchmark;
|
||||
|
||||
namespace {
|
||||
// get current time in milliseconds
|
||||
// Get current time in milliseconds
|
||||
uint64_t ms() {
|
||||
struct timespec ts{};
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
|
|
@ -41,29 +40,28 @@ namespace {
|
|||
|
||||
int benchmark::run(const Options& opts) {
|
||||
try {
|
||||
// parse options
|
||||
// Parse options
|
||||
if (opts.flow < 0.25F || opts.flow > 1.0F)
|
||||
throw ls::error("flow scale must be between 0.25 and 1.0");
|
||||
throw ls::error("Flow scale must be between 0.25 and 1.0");
|
||||
if (opts.multiplier < 2)
|
||||
throw ls::error("multiplier must be 2 or greater");
|
||||
throw ls::error("Multiplier must be 2 or greater");
|
||||
if (opts.width <= 0 || opts.height <= 0)
|
||||
throw ls::error("width and height must be positive integers");
|
||||
throw ls::error("Width and height must be positive integers");
|
||||
if (opts.duration <= 0)
|
||||
throw ls::error("duration must be a positive integer");
|
||||
throw ls::error("Duration must be a positive integer");
|
||||
const VkExtent2D extent{
|
||||
static_cast<uint32_t>(opts.width),
|
||||
static_cast<uint32_t>(opts.height)
|
||||
};
|
||||
|
||||
// create instance
|
||||
// Create instance
|
||||
std::string gpu_name{};
|
||||
|
||||
const vk::Vulkan vk{
|
||||
"lsfg-vk-debug", vk::version{2, 0, 0},
|
||||
"lsfg-vk-debug-engine", vk::version{2, 0, 0},
|
||||
[opts](const vk::VulkanInstanceFuncs fi,
|
||||
"lsfg-vk-debug", vk::version{2, 0, 0},
|
||||
[opts, gpu_name = &gpu_name](const vk::VulkanInstanceFuncs fi,
|
||||
const std::vector<VkPhysicalDevice>& devices) {
|
||||
if (!opts.gpu.has_value())
|
||||
return devices.front();
|
||||
|
||||
for (const VkPhysicalDevice& device : devices) {
|
||||
VkPhysicalDeviceProperties2 props{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2
|
||||
|
|
@ -72,84 +70,81 @@ int benchmark::run(const Options& opts) {
|
|||
|
||||
auto& properties = props.properties;
|
||||
std::array<char, 256> devname = std::to_array(properties.deviceName);
|
||||
devname.at(255) = '\0'; // ensure null-termination
|
||||
devname.at(255) = '\0'; // Ensure null-termination
|
||||
|
||||
if (std::string(devname.data()) == *opts.gpu)
|
||||
if (!opts.gpu || std::string(devname.data()) == *opts.gpu) {
|
||||
*gpu_name = std::string(devname.data());
|
||||
return device;
|
||||
}
|
||||
}
|
||||
|
||||
throw ls::error("failed to find specified GPU: " + *opts.gpu);
|
||||
throw ls::error("Failed to find specified GPU: " + *opts.gpu);
|
||||
}
|
||||
};
|
||||
|
||||
std::pair<int, int> srcfds{};
|
||||
const vk::Image frame_0{vk,
|
||||
extent, VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
std::nullopt, &srcfds.first};
|
||||
const vk::Image frame_1{vk,
|
||||
extent, VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
std::nullopt, &srcfds.second};
|
||||
|
||||
std::vector<vk::Image> destimgs{};
|
||||
std::vector<int> destfds{};
|
||||
for (int i = 0; i < (opts.multiplier - 1); i++) {
|
||||
int fd{};
|
||||
destimgs.emplace_back(vk,
|
||||
extent, VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
std::nullopt,
|
||||
&fd
|
||||
);
|
||||
destfds.push_back(fd);
|
||||
}
|
||||
|
||||
int syncfd{};
|
||||
const vk::TimelineSemaphore sync{vk, 0, std::nullopt, &syncfd};
|
||||
|
||||
// initialize backend
|
||||
// Initialize backend
|
||||
std::string dll{};
|
||||
if (opts.dll.has_value())
|
||||
dll = *opts.dll;
|
||||
else
|
||||
dll = ls::findShaderDll();
|
||||
|
||||
lsfgvk::backend::Instance lsfgvk{
|
||||
[opts](
|
||||
const std::string& gpu_name,
|
||||
std::pair<const std::string&, const std::string&>,
|
||||
const std::optional<std::string>&
|
||||
) {
|
||||
return opts.gpu.value_or(gpu_name) == gpu_name;
|
||||
},
|
||||
dll, opts.allow_fp16
|
||||
const lsfgvk::Instance lsfgvk{
|
||||
gpu_name,
|
||||
dll,
|
||||
opts.allow_fp16
|
||||
};
|
||||
lsfgvk::Context lsfgvk_ctx{
|
||||
lsfgvk,
|
||||
extent.width, extent.height,
|
||||
opts.flow, opts.performance_mode
|
||||
};
|
||||
lsfgvk::backend::Context& lsfgvk_ctx = lsfgvk.openContext(
|
||||
srcfds, destfds,
|
||||
syncfd, extent.width, extent.height,
|
||||
false, 1.0F / opts.flow, opts.performance_mode
|
||||
);
|
||||
|
||||
// run the benchmark
|
||||
// Import resources
|
||||
const auto fds{lsfgvk_ctx.exportFds()};
|
||||
|
||||
const vk::Image source{vk,
|
||||
extent,
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
fds.sourceFd, std::nullopt, 2
|
||||
};
|
||||
const vk::Image destination{vk,
|
||||
extent,
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
fds.destinationFd
|
||||
};
|
||||
const vk::TimelineSemaphore sync{vk,
|
||||
0,
|
||||
fds.syncFd
|
||||
};
|
||||
|
||||
// Run the benchmark
|
||||
const uint32_t total{static_cast<uint32_t>(opts.multiplier) - 1U};
|
||||
|
||||
size_t iterations{0};
|
||||
size_t generated_frames{0};
|
||||
size_t total_frames{1};
|
||||
size_t total_frames{0};
|
||||
size_t idx{1};
|
||||
|
||||
uint64_t print_time = ms() + 1000ULL;
|
||||
const uint64_t end_time = ms() + static_cast<uint64_t>(opts.duration) * 1000ULL;
|
||||
while (ms() < end_time) {
|
||||
sync.signal(vk, total_frames++);
|
||||
lsfgvk.scheduleFrames(lsfgvk_ctx);
|
||||
lsfgvk_ctx.dispatch(total);
|
||||
|
||||
for (size_t i = 0; i < destimgs.size(); i++) {
|
||||
auto success = sync.wait(vk, total_frames++);
|
||||
for (size_t i = 0; i < total; i++) {
|
||||
sync.signal(vk, idx++);
|
||||
|
||||
auto success = sync.wait(vk, idx++);
|
||||
if (!success)
|
||||
throw ls::error("failed to wait for frame");
|
||||
throw ls::error("Failed to wait for frame");
|
||||
|
||||
total_frames++;
|
||||
generated_frames++;
|
||||
}
|
||||
|
||||
total_frames++;
|
||||
iterations++;
|
||||
|
||||
if (ms() >= print_time) {
|
||||
|
|
@ -158,25 +153,25 @@ int benchmark::run(const Options& opts) {
|
|||
}
|
||||
}
|
||||
|
||||
// output results
|
||||
|
||||
// Output results
|
||||
std::cerr << (opts.duration < 40 ? "\r" : "\n");
|
||||
std::cerr << "benchmark results (ran for " << opts.duration << " seconds):\n";
|
||||
std::cerr << " iterations: " << iterations << "\n";
|
||||
std::cerr << " generated frames: " << generated_frames << "\n";
|
||||
std::cerr << " total frames: " << total_frames << "\n";
|
||||
std::cerr << "Benchmark results (ran for " << opts.duration << " seconds):\n";
|
||||
std::cerr << " Iterations: " << iterations << "\n";
|
||||
std::cerr << " Generated frames: " << generated_frames << "\n";
|
||||
std::cerr << " Total frames: " << total_frames << "\n";
|
||||
const auto time = static_cast<double>(opts.duration);
|
||||
const double fps_generated = static_cast<double>(generated_frames) / time;
|
||||
const double fps_total = static_cast<double>(total_frames) / time;
|
||||
std::cerr << std::setprecision(2) << std::fixed;
|
||||
std::cerr << " fps (generated): " << fps_generated << "fps\n";
|
||||
std::cerr << " fps (total): " << fps_total << "fps\n";
|
||||
std::cerr << " FPS (generated): " << fps_generated << "fps\n";
|
||||
std::cerr << " FPS (total): " << fps_total << "fps\n";
|
||||
|
||||
// Wait for idle
|
||||
lsfgvk_ctx.idle();
|
||||
|
||||
// deinitialize lsfg-vk
|
||||
lsfgvk.closeContext(lsfgvk_ctx);
|
||||
return EXIT_SUCCESS;
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "error: " << e.what() << "\n";
|
||||
std::cerr << "Error: " << e.what() << "\n";
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,9 @@
|
|||
|
||||
namespace lsfgvk::cli::benchmark {
|
||||
|
||||
/// options for the "benchmark" command
|
||||
///
|
||||
/// Options for the "benchmark" command
|
||||
///
|
||||
struct Options {
|
||||
std::optional<std::string> dll;
|
||||
bool allow_fp16{false};
|
||||
|
|
@ -22,8 +24,12 @@ namespace lsfgvk::cli::benchmark {
|
|||
int duration{10};
|
||||
};
|
||||
|
||||
/// run the "benchmark" command
|
||||
/// @param opts the command options
|
||||
///
|
||||
/// Run the "benchmark" command
|
||||
///
|
||||
/// @param opts Command options
|
||||
/// @return Exit code
|
||||
///
|
||||
int run(const Options& opts);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "debug.hpp"
|
||||
#include "lsfg-vk-backend/lsfgvk.hpp"
|
||||
#include "lsfg-vk-common/helpers/errors.hpp"
|
||||
#include "lsfg-vk-common/helpers/paths.hpp"
|
||||
#include "lsfg-vk-common/vulkan/buffer.hpp"
|
||||
|
|
@ -10,6 +9,9 @@
|
|||
#include "lsfg-vk-common/vulkan/timeline_semaphore.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#define LSFGVK_PRIV
|
||||
#include "lsfg-vk/lsfgvk.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
|
|
@ -22,25 +24,30 @@
|
|||
#include <iostream>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <thread>
|
||||
#include <vector>
|
||||
|
||||
#include <dlfcn.h>
|
||||
#include <renderdoc_app.h>
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
using namespace lsfgvk::cli;
|
||||
using namespace lsfgvk::cli::debug;
|
||||
|
||||
namespace {
|
||||
/// uploads an image from a dds file
|
||||
void upload_image(const vk::Vulkan& vk,
|
||||
const vk::Image& image, const std::string& path) {
|
||||
// read image bytecode
|
||||
/// Upload an image from a DDS file
|
||||
void uploadDDS(const vk::Vulkan& vk,
|
||||
const vk::Image& image,
|
||||
const std::string& path,
|
||||
uint32_t layer
|
||||
) {
|
||||
// Read image data
|
||||
std::ifstream file(path.data(), std::ios::binary | std::ios::ate);
|
||||
if (!file.is_open())
|
||||
throw ls::error("ifstream::ifstream() failed");
|
||||
|
||||
std::streamsize size = static_cast<std::streamsize>(file.tellg());
|
||||
size -= 124 + 4; // dds header and magic bytes
|
||||
size -= 124 + 4; // DDS header and magic bytes
|
||||
|
||||
std::vector<char> code(static_cast<size_t>(size));
|
||||
file.seekg(124 + 4, std::ios::beg);
|
||||
|
|
@ -49,13 +56,13 @@ namespace {
|
|||
|
||||
file.close();
|
||||
|
||||
// upload to image
|
||||
// Upload to image
|
||||
const vk::Buffer stagingbuf{vk, code.data(), code.size(),
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT};
|
||||
|
||||
const vk::CommandBuffer cmdbuf{vk};
|
||||
cmdbuf.begin(vk);
|
||||
cmdbuf.copyBufferToImage(vk, stagingbuf, image);
|
||||
cmdbuf.copyBufferToImage(vk, stagingbuf, image, layer);
|
||||
cmdbuf.end(vk);
|
||||
|
||||
const vk::TimelineSemaphore sema{vk, 0};
|
||||
|
|
@ -65,19 +72,19 @@ namespace {
|
|||
|
||||
int debug::run(const Options& opts) {
|
||||
try {
|
||||
// parse options
|
||||
// Parse options
|
||||
if (opts.flow < 0.25F || opts.flow > 1.0F)
|
||||
throw ls::error("flow scale must be between 0.25 and 1.0");
|
||||
throw ls::error("Flow scale must be between 0.25 and 1.0");
|
||||
if (opts.multiplier < 2)
|
||||
throw ls::error("multiplier must be 2 or greater");
|
||||
throw ls::error("Multiplier must be 2 or greater");
|
||||
if (opts.width <= 0 || opts.height <= 0)
|
||||
throw ls::error("width and height must be positive integers");
|
||||
throw ls::error("Width and height must be positive integers");
|
||||
const VkExtent2D extent{
|
||||
static_cast<uint32_t>(opts.width),
|
||||
static_cast<uint32_t>(opts.height)
|
||||
};
|
||||
if (!std::filesystem::exists(opts.path))
|
||||
throw ls::error("debug path does not exist: " + opts.path.string());
|
||||
throw ls::error("Debug path does not exist: " + opts.path.string());
|
||||
std::vector<std::filesystem::path> paths{};
|
||||
for (const auto& entry : std::filesystem::directory_iterator(opts.path))
|
||||
paths.push_back(entry.path());
|
||||
|
|
@ -87,23 +94,22 @@ int debug::run(const Options& opts) {
|
|||
|
||||
auto norm_a = fa.find_first_of('.');
|
||||
if (norm_a == std::string::npos)
|
||||
throw ls::error("invalid debug file name: " + fa);
|
||||
throw ls::error("Invalid debug file name: " + fa);
|
||||
auto norm_b = fb.find_first_of('.');
|
||||
if (norm_b == std::string::npos)
|
||||
throw ls::error("invalid debug file name: " + fb);
|
||||
throw ls::error("Invalid debug file name: " + fb);
|
||||
|
||||
return std::stoi(fa.substr(0, norm_a)) < std::stoi(fb.substr(0, norm_b));
|
||||
});
|
||||
|
||||
// create instance
|
||||
// Create instance
|
||||
std::string gpu_name{};
|
||||
|
||||
const vk::Vulkan vk{
|
||||
"lsfg-vk-debug", vk::version{2, 0, 0},
|
||||
"lsfg-vk-debug-engine", vk::version{2, 0, 0},
|
||||
[opts](const vk::VulkanInstanceFuncs fi,
|
||||
"lsfg-vk-debug", vk::version{2, 0, 0},
|
||||
[opts, gpu_name = &gpu_name](const vk::VulkanInstanceFuncs fi,
|
||||
const std::vector<VkPhysicalDevice>& devices) {
|
||||
if (!opts.gpu.has_value())
|
||||
return devices.front();
|
||||
|
||||
for (const VkPhysicalDevice& device : devices) {
|
||||
VkPhysicalDeviceProperties2 props{
|
||||
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2
|
||||
|
|
@ -112,87 +118,108 @@ int debug::run(const Options& opts) {
|
|||
|
||||
auto& properties = props.properties;
|
||||
std::array<char, 256> devname = std::to_array(properties.deviceName);
|
||||
devname.at(255) = '\0'; // ensure null-termination
|
||||
devname.at(255) = '\0'; // Ensure null-termination
|
||||
|
||||
if (std::string(devname.data()) == *opts.gpu)
|
||||
if (!opts.gpu || std::string(devname.data()) == *opts.gpu) {
|
||||
*gpu_name = std::string(devname.data());
|
||||
return device;
|
||||
}
|
||||
}
|
||||
|
||||
throw ls::error("failed to find specified GPU: " + *opts.gpu);
|
||||
throw ls::error("Failed to find specified GPU: " + *opts.gpu);
|
||||
}
|
||||
};
|
||||
|
||||
std::pair<int, int> srcfds{};
|
||||
const vk::Image frame_0{vk,
|
||||
extent, VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
std::nullopt, &srcfds.first};
|
||||
const vk::Image frame_1{vk,
|
||||
extent, VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
std::nullopt, &srcfds.second};
|
||||
|
||||
std::vector<vk::Image> destimgs{};
|
||||
std::vector<int> destfds{};
|
||||
for (int i = 0; i < (opts.multiplier - 1); i++) {
|
||||
int fd{};
|
||||
destimgs.emplace_back(vk,
|
||||
extent, VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
std::nullopt,
|
||||
&fd
|
||||
);
|
||||
destfds.push_back(fd);
|
||||
}
|
||||
|
||||
int syncfd{};
|
||||
const vk::TimelineSemaphore sync{vk, 0, std::nullopt, &syncfd};
|
||||
|
||||
// initialize backend
|
||||
// Initialize backend
|
||||
std::string dll{};
|
||||
if (opts.dll.has_value())
|
||||
dll = *opts.dll;
|
||||
else
|
||||
dll = ls::findShaderDll();
|
||||
lsfgvk::backend::Instance lsfgvk{
|
||||
[opts](
|
||||
const std::string& gpu_name,
|
||||
std::pair<const std::string&, const std::string&>,
|
||||
const std::optional<std::string>&
|
||||
) {
|
||||
return opts.gpu.value_or(gpu_name) == gpu_name;
|
||||
},
|
||||
dll, opts.allow_fp16
|
||||
};
|
||||
lsfgvk::backend::Context& lsfgvk_ctx = lsfgvk.openContext(
|
||||
srcfds, destfds,
|
||||
syncfd, extent.width, extent.height,
|
||||
false, 1.0F / opts.flow, opts.performance_mode
|
||||
);
|
||||
|
||||
// render destination images
|
||||
size_t idx{1};
|
||||
for (size_t j = 0; j < paths.size(); j++) {
|
||||
upload_image(vk,
|
||||
j % 2 == 0 ? frame_0 : frame_1,
|
||||
paths.at(j).string()
|
||||
const lsfgvk::Instance lsfgvk{
|
||||
gpu_name,
|
||||
dll,
|
||||
opts.allow_fp16
|
||||
};
|
||||
lsfgvk::Context lsfgvk_ctx{
|
||||
lsfgvk,
|
||||
extent.width, extent.height,
|
||||
opts.flow, opts.performance_mode
|
||||
};
|
||||
|
||||
// Import resources
|
||||
const auto fds{lsfgvk_ctx.exportFds()};
|
||||
|
||||
const vk::Image source{vk,
|
||||
extent,
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
fds.sourceFd, std::nullopt, 2
|
||||
};
|
||||
const vk::Image destination{vk,
|
||||
extent,
|
||||
VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
fds.destinationFd
|
||||
};
|
||||
const vk::TimelineSemaphore sync{vk,
|
||||
0,
|
||||
fds.syncFd
|
||||
};
|
||||
|
||||
// Try to open RenderDoc
|
||||
RENDERDOC_API_1_6_0* rdoc_api{nullptr};
|
||||
RENDERDOC_DevicePointer rdoc_device{nullptr};
|
||||
if (void* module = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD)) {
|
||||
void* func{dlsym(module, "RENDERDOC_GetAPI")};
|
||||
|
||||
auto* GetAPI{reinterpret_cast<pRENDERDOC_GetAPI>(func)}; // NOLINT (unsafe cast)
|
||||
GetAPI(
|
||||
eRENDERDOC_API_Version_1_0_0,
|
||||
reinterpret_cast<void**>(&rdoc_api) // NOLINT (unsafe cast)
|
||||
);
|
||||
|
||||
sync.signal(vk, idx++);
|
||||
lsfgvk.scheduleFrames(lsfgvk_ctx);
|
||||
|
||||
for (size_t i = 0; i < destimgs.size(); i++) {
|
||||
auto success = sync.wait(vk, idx++);
|
||||
if (!success)
|
||||
throw ls::error("failed to wait for frame");
|
||||
}
|
||||
rdoc_device = RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(lsfgvk._instance());
|
||||
}
|
||||
|
||||
// deinitialize lsfg-vk
|
||||
lsfgvk.closeContext(lsfgvk_ctx);
|
||||
// Render destination images
|
||||
const uint32_t total{static_cast<uint32_t>(opts.multiplier) - 1U};
|
||||
|
||||
size_t idx{1};
|
||||
for (size_t j = 0; j < paths.size(); j++) {
|
||||
uploadDDS(vk, source, paths.at(j).string(), j % 2);
|
||||
|
||||
if (rdoc_api) {
|
||||
rdoc_api->StartFrameCapture(rdoc_device, nullptr);
|
||||
}
|
||||
|
||||
std::thread signal_thread{[&sync, &vk, &idx, total] {
|
||||
for (size_t i = 0; i < total; i++) {
|
||||
sync.signal(vk, idx++);
|
||||
|
||||
auto success = sync.wait(vk, idx++);
|
||||
if (!success)
|
||||
throw ls::error("Failed to wait for frame");
|
||||
}
|
||||
}};
|
||||
|
||||
lsfgvk_ctx.dispatch(total);
|
||||
|
||||
if (rdoc_api) {
|
||||
lsfgvk_ctx.idle();
|
||||
rdoc_api->EndFrameCapture(rdoc_device, nullptr);
|
||||
}
|
||||
|
||||
signal_thread.join();
|
||||
}
|
||||
|
||||
// Wait for idle
|
||||
lsfgvk_ctx.idle();
|
||||
|
||||
return EXIT_SUCCESS;
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "error: " << e.what() << "\n";
|
||||
std::cerr << "Error: " << e.what() << "\n";
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -8,23 +8,29 @@
|
|||
|
||||
namespace lsfgvk::cli::debug {
|
||||
|
||||
/// options for the "debug" command
|
||||
///
|
||||
/// Options for the "debug" command
|
||||
///
|
||||
struct Options {
|
||||
std::optional<std::string> dll;
|
||||
bool allow_fp16{true};
|
||||
bool allow_fp16{false};
|
||||
int width{1920};
|
||||
int height{1080};
|
||||
|
||||
float flow{0.85F};
|
||||
float flow{1.0F};
|
||||
int multiplier{2};
|
||||
bool performance_mode{true};
|
||||
bool performance_mode{false};
|
||||
std::optional<std::string> gpu;
|
||||
|
||||
std::filesystem::path path;
|
||||
};
|
||||
|
||||
/// run the "debug" command
|
||||
/// @param opts the command options
|
||||
///
|
||||
/// Run the "debug" command
|
||||
///
|
||||
/// @param opts Command options
|
||||
/// @return Exit code
|
||||
///
|
||||
int run(const Options& opts);
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ int validate::run(const Options& opts) {
|
|||
path = *opts.config;
|
||||
|
||||
if (!std::filesystem::exists(path)) {
|
||||
std::cerr << "Validation failed: configuration file does not exist\n";
|
||||
std::cerr << "Validation failed: Configuration file does not exist\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,13 +7,19 @@
|
|||
|
||||
namespace lsfgvk::cli::validate {
|
||||
|
||||
/// options for the "validate" command
|
||||
///
|
||||
/// Options for the "validate" command
|
||||
///
|
||||
struct Options {
|
||||
std::optional<std::string> config;
|
||||
};
|
||||
|
||||
/// run the "validate" command
|
||||
/// @param opts the command options
|
||||
///
|
||||
/// Run the "validate" command
|
||||
///
|
||||
/// @param opts Command options
|
||||
/// @return Exit code
|
||||
///
|
||||
int run(const Options& opts);
|
||||
|
||||
}
|
||||
|
|
|
|||
875
lsfg-vk-cli/thirdparty/include/renderdoc_app.h
vendored
Normal file
875
lsfg-vk-cli/thirdparty/include/renderdoc_app.h
vendored
Normal file
|
|
@ -0,0 +1,875 @@
|
|||
/******************************************************************************
|
||||
* The MIT License (MIT)
|
||||
*
|
||||
* Copyright (c) 2015-2026 Baldur Karlsson
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to deal
|
||||
* in the Software without restriction, including without limitation the rights
|
||||
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the Software is
|
||||
* furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
* THE SOFTWARE.
|
||||
******************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// Documentation for the API is available at https://renderdoc.org/docs/in_application_api.html
|
||||
//
|
||||
|
||||
#if !defined(RENDERDOC_NO_STDINT)
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
|
||||
#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER)
|
||||
#define RENDERDOC_CC __cdecl
|
||||
#elif defined(__linux__) || defined(__FreeBSD__) || defined(__sun__) || defined(__OpenBSD__)
|
||||
#define RENDERDOC_CC
|
||||
#elif defined(__APPLE__)
|
||||
#define RENDERDOC_CC
|
||||
#else
|
||||
#error "Unknown platform"
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Constants not used directly in below API
|
||||
|
||||
// This is a GUID/magic value used for when applications pass a path where shader debug
|
||||
// information can be found to match up with a stripped shader.
|
||||
// the define can be used like so: const GUID RENDERDOC_ShaderDebugMagicValue =
|
||||
// RENDERDOC_ShaderDebugMagicValue_value
|
||||
#define RENDERDOC_ShaderDebugMagicValue_struct \
|
||||
{ \
|
||||
0xeab25520, 0x6670, 0x4865, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \
|
||||
}
|
||||
|
||||
// as an alternative when you want a byte array (assuming x86 endianness):
|
||||
#define RENDERDOC_ShaderDebugMagicValue_bytearray \
|
||||
{ \
|
||||
0x20, 0x55, 0xb2, 0xea, 0x70, 0x66, 0x65, 0x48, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \
|
||||
}
|
||||
|
||||
// truncated version when only a uint64_t is available (e.g. Vulkan tags):
|
||||
#define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL
|
||||
|
||||
// this is a magic value for vulkan user tags to indicate which dispatchable API objects are which
|
||||
// for object annotations
|
||||
#define RENDERDOC_APIObjectAnnotationHelper 0xfbb3b337b664d0adULL
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// RenderDoc capture options
|
||||
//
|
||||
|
||||
typedef enum RENDERDOC_CaptureOption
|
||||
{
|
||||
// Allow the application to enable vsync
|
||||
//
|
||||
// Default - enabled
|
||||
//
|
||||
// 1 - The application can enable or disable vsync at will
|
||||
// 0 - vsync is force disabled
|
||||
eRENDERDOC_Option_AllowVSync = 0,
|
||||
|
||||
// Allow the application to enable fullscreen
|
||||
//
|
||||
// Default - enabled
|
||||
//
|
||||
// 1 - The application can enable or disable fullscreen at will
|
||||
// 0 - fullscreen is force disabled
|
||||
eRENDERDOC_Option_AllowFullscreen = 1,
|
||||
|
||||
// Record API debugging events and messages
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Enable built-in API debugging features and records the results into
|
||||
// the capture, which is matched up with events on replay
|
||||
// 0 - no API debugging is forcibly enabled
|
||||
eRENDERDOC_Option_APIValidation = 2,
|
||||
eRENDERDOC_Option_DebugDeviceMode = 2, // deprecated name of this enum
|
||||
|
||||
// Capture CPU callstacks for API events
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Enables capturing of callstacks
|
||||
// 0 - no callstacks are captured
|
||||
eRENDERDOC_Option_CaptureCallstacks = 3,
|
||||
|
||||
// When capturing CPU callstacks, only capture them from actions.
|
||||
// This option does nothing without the above option being enabled
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Only captures callstacks for actions.
|
||||
// Ignored if CaptureCallstacks is disabled
|
||||
// 0 - Callstacks, if enabled, are captured for every event.
|
||||
eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4,
|
||||
eRENDERDOC_Option_CaptureCallstacksOnlyActions = 4,
|
||||
|
||||
// Specify a delay in seconds to wait for a debugger to attach, after
|
||||
// creating or injecting into a process, before continuing to allow it to run.
|
||||
//
|
||||
// 0 indicates no delay, and the process will run immediately after injection
|
||||
//
|
||||
// Default - 0 seconds
|
||||
//
|
||||
eRENDERDOC_Option_DelayForDebugger = 5,
|
||||
|
||||
// Verify buffer access. This includes checking the memory returned by a Map() call to
|
||||
// detect any out-of-bounds modification, as well as initialising buffers with undefined contents
|
||||
// to a marker value to catch use of uninitialised memory.
|
||||
//
|
||||
// NOTE: This option is only valid for OpenGL and D3D11. Explicit APIs such as D3D12 and Vulkan do
|
||||
// not do the same kind of interception & checking and undefined contents are really undefined.
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Verify buffer access
|
||||
// 0 - No verification is performed, and overwriting bounds may cause crashes or corruption in
|
||||
// RenderDoc.
|
||||
eRENDERDOC_Option_VerifyBufferAccess = 6,
|
||||
|
||||
// The old name for eRENDERDOC_Option_VerifyBufferAccess was eRENDERDOC_Option_VerifyMapWrites.
|
||||
// This option now controls the filling of uninitialised buffers with 0xdddddddd which was
|
||||
// previously always enabled
|
||||
eRENDERDOC_Option_VerifyMapWrites = eRENDERDOC_Option_VerifyBufferAccess,
|
||||
|
||||
// Hooks any system API calls that create child processes, and injects
|
||||
// RenderDoc into them recursively with the same options.
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - Hooks into spawned child processes
|
||||
// 0 - Child processes are not hooked by RenderDoc
|
||||
eRENDERDOC_Option_HookIntoChildren = 7,
|
||||
|
||||
// By default RenderDoc only includes resources in the final capture necessary
|
||||
// for that frame, this allows you to override that behaviour.
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - all live resources at the time of capture are included in the capture
|
||||
// and available for inspection
|
||||
// 0 - only the resources referenced by the captured frame are included
|
||||
eRENDERDOC_Option_RefAllResources = 8,
|
||||
|
||||
// **NOTE**: As of RenderDoc v1.1 this option has been deprecated. Setting or
|
||||
// getting it will be ignored, to allow compatibility with older versions.
|
||||
// In v1.1 the option acts as if it's always enabled.
|
||||
//
|
||||
// By default RenderDoc skips saving initial states for resources where the
|
||||
// previous contents don't appear to be used, assuming that writes before
|
||||
// reads indicate previous contents aren't used.
|
||||
//
|
||||
// Default - disabled
|
||||
//
|
||||
// 1 - initial contents at the start of each captured frame are saved, even if
|
||||
// they are later overwritten or cleared before being used.
|
||||
// 0 - unless a read is detected, initial contents will not be saved and will
|
||||
// appear as black or empty data.
|
||||
eRENDERDOC_Option_SaveAllInitials = 9,
|
||||
|
||||
// In APIs that allow for the recording of command lists to be replayed later,
|
||||
// RenderDoc may choose to not capture command lists before a frame capture is
|
||||
// triggered, to reduce overheads. This means any command lists recorded once
|
||||
// and replayed many times will not be available and may cause a failure to
|
||||
// capture.
|
||||
//
|
||||
// NOTE: This is only true for APIs where multithreading is difficult or
|
||||
// discouraged. Newer APIs like Vulkan and D3D12 will ignore this option
|
||||
// and always capture all command lists since the API is heavily oriented
|
||||
// around it and the overheads have been reduced by API design.
|
||||
//
|
||||
// 1 - All command lists are captured from the start of the application
|
||||
// 0 - Command lists are only captured if their recording begins during
|
||||
// the period when a frame capture is in progress.
|
||||
eRENDERDOC_Option_CaptureAllCmdLists = 10,
|
||||
|
||||
// Mute API debugging output when the API validation mode option is enabled
|
||||
//
|
||||
// Default - enabled
|
||||
//
|
||||
// 1 - Mute any API debug messages from being displayed or passed through
|
||||
// 0 - API debugging is displayed as normal
|
||||
eRENDERDOC_Option_DebugOutputMute = 11,
|
||||
|
||||
// Option to allow vendor extensions to be used even when they may be
|
||||
// incompatible with RenderDoc and cause corrupted replays or crashes.
|
||||
//
|
||||
// Default - inactive
|
||||
//
|
||||
// No values are documented, this option should only be used when absolutely
|
||||
// necessary as directed by a RenderDoc developer.
|
||||
eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12,
|
||||
|
||||
// Define a soft memory limit which some APIs may aim to keep overhead under where
|
||||
// possible. Anything above this limit will where possible be saved directly to disk during
|
||||
// capture.
|
||||
// This will cause increased disk space use (which may cause a capture to fail if disk space is
|
||||
// exhausted) as well as slower capture times.
|
||||
//
|
||||
// Not all memory allocations may be deferred like this so it is not a guarantee of a memory
|
||||
// limit.
|
||||
//
|
||||
// Units are in MBs, suggested values would range from 200MB to 1000MB.
|
||||
//
|
||||
// Default - 0 Megabytes
|
||||
eRENDERDOC_Option_SoftMemoryLimit = 13,
|
||||
} RENDERDOC_CaptureOption;
|
||||
|
||||
// Sets an option that controls how RenderDoc behaves on capture.
|
||||
//
|
||||
// Returns 1 if the option and value are valid
|
||||
// Returns 0 if either is invalid and the option is unchanged
|
||||
typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val);
|
||||
typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val);
|
||||
|
||||
// Gets the current value of an option as a uint32_t
|
||||
//
|
||||
// If the option is invalid, 0xffffffff is returned
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt);
|
||||
|
||||
// Gets the current value of an option as a float
|
||||
//
|
||||
// If the option is invalid, -FLT_MAX is returned
|
||||
typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt);
|
||||
|
||||
typedef enum RENDERDOC_InputButton
|
||||
{
|
||||
// '0' - '9' matches ASCII values
|
||||
eRENDERDOC_Key_0 = 0x30,
|
||||
eRENDERDOC_Key_1 = 0x31,
|
||||
eRENDERDOC_Key_2 = 0x32,
|
||||
eRENDERDOC_Key_3 = 0x33,
|
||||
eRENDERDOC_Key_4 = 0x34,
|
||||
eRENDERDOC_Key_5 = 0x35,
|
||||
eRENDERDOC_Key_6 = 0x36,
|
||||
eRENDERDOC_Key_7 = 0x37,
|
||||
eRENDERDOC_Key_8 = 0x38,
|
||||
eRENDERDOC_Key_9 = 0x39,
|
||||
|
||||
// 'A' - 'Z' matches ASCII values
|
||||
eRENDERDOC_Key_A = 0x41,
|
||||
eRENDERDOC_Key_B = 0x42,
|
||||
eRENDERDOC_Key_C = 0x43,
|
||||
eRENDERDOC_Key_D = 0x44,
|
||||
eRENDERDOC_Key_E = 0x45,
|
||||
eRENDERDOC_Key_F = 0x46,
|
||||
eRENDERDOC_Key_G = 0x47,
|
||||
eRENDERDOC_Key_H = 0x48,
|
||||
eRENDERDOC_Key_I = 0x49,
|
||||
eRENDERDOC_Key_J = 0x4A,
|
||||
eRENDERDOC_Key_K = 0x4B,
|
||||
eRENDERDOC_Key_L = 0x4C,
|
||||
eRENDERDOC_Key_M = 0x4D,
|
||||
eRENDERDOC_Key_N = 0x4E,
|
||||
eRENDERDOC_Key_O = 0x4F,
|
||||
eRENDERDOC_Key_P = 0x50,
|
||||
eRENDERDOC_Key_Q = 0x51,
|
||||
eRENDERDOC_Key_R = 0x52,
|
||||
eRENDERDOC_Key_S = 0x53,
|
||||
eRENDERDOC_Key_T = 0x54,
|
||||
eRENDERDOC_Key_U = 0x55,
|
||||
eRENDERDOC_Key_V = 0x56,
|
||||
eRENDERDOC_Key_W = 0x57,
|
||||
eRENDERDOC_Key_X = 0x58,
|
||||
eRENDERDOC_Key_Y = 0x59,
|
||||
eRENDERDOC_Key_Z = 0x5A,
|
||||
|
||||
// leave the rest of the ASCII range free
|
||||
// in case we want to use it later
|
||||
eRENDERDOC_Key_NonPrintable = 0x100,
|
||||
|
||||
eRENDERDOC_Key_Divide,
|
||||
eRENDERDOC_Key_Multiply,
|
||||
eRENDERDOC_Key_Subtract,
|
||||
eRENDERDOC_Key_Plus,
|
||||
|
||||
eRENDERDOC_Key_F1,
|
||||
eRENDERDOC_Key_F2,
|
||||
eRENDERDOC_Key_F3,
|
||||
eRENDERDOC_Key_F4,
|
||||
eRENDERDOC_Key_F5,
|
||||
eRENDERDOC_Key_F6,
|
||||
eRENDERDOC_Key_F7,
|
||||
eRENDERDOC_Key_F8,
|
||||
eRENDERDOC_Key_F9,
|
||||
eRENDERDOC_Key_F10,
|
||||
eRENDERDOC_Key_F11,
|
||||
eRENDERDOC_Key_F12,
|
||||
|
||||
eRENDERDOC_Key_Home,
|
||||
eRENDERDOC_Key_End,
|
||||
eRENDERDOC_Key_Insert,
|
||||
eRENDERDOC_Key_Delete,
|
||||
eRENDERDOC_Key_PageUp,
|
||||
eRENDERDOC_Key_PageDn,
|
||||
|
||||
eRENDERDOC_Key_Backspace,
|
||||
eRENDERDOC_Key_Tab,
|
||||
eRENDERDOC_Key_PrtScrn,
|
||||
eRENDERDOC_Key_Pause,
|
||||
|
||||
eRENDERDOC_Key_Max,
|
||||
} RENDERDOC_InputButton;
|
||||
|
||||
// Sets which key or keys can be used to toggle focus between multiple windows
|
||||
//
|
||||
// If keys is NULL or num is 0, toggle keys will be disabled
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num);
|
||||
|
||||
// Sets which key or keys can be used to capture the next frame
|
||||
//
|
||||
// If keys is NULL or num is 0, captures keys will be disabled
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num);
|
||||
|
||||
typedef enum RENDERDOC_OverlayBits
|
||||
{
|
||||
// This single bit controls whether the overlay is enabled or disabled globally
|
||||
eRENDERDOC_Overlay_Enabled = 0x1,
|
||||
|
||||
// Show the average framerate over several seconds as well as min/max
|
||||
eRENDERDOC_Overlay_FrameRate = 0x2,
|
||||
|
||||
// Show the current frame number
|
||||
eRENDERDOC_Overlay_FrameNumber = 0x4,
|
||||
|
||||
// Show a list of recent captures, and how many captures have been made
|
||||
eRENDERDOC_Overlay_CaptureList = 0x8,
|
||||
|
||||
// Default values for the overlay mask
|
||||
eRENDERDOC_Overlay_Default = (eRENDERDOC_Overlay_Enabled | eRENDERDOC_Overlay_FrameRate |
|
||||
eRENDERDOC_Overlay_FrameNumber | eRENDERDOC_Overlay_CaptureList),
|
||||
|
||||
// Enable all bits
|
||||
eRENDERDOC_Overlay_All = 0x7ffffff,
|
||||
|
||||
// Disable all bits
|
||||
eRENDERDOC_Overlay_None = 0,
|
||||
} RENDERDOC_OverlayBits;
|
||||
|
||||
// returns the overlay bits that have been set
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetOverlayBits)(void);
|
||||
// sets the overlay bits with an and & or mask
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or);
|
||||
|
||||
// this function will attempt to remove RenderDoc's hooks in the application.
|
||||
//
|
||||
// Note: that this can only work correctly if done immediately after
|
||||
// the module is loaded, before any API work happens. RenderDoc will remove its
|
||||
// injected hooks and shut down. Behaviour is undefined if this is called
|
||||
// after any API functions have been called, and there is still no guarantee of
|
||||
// success.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_RemoveHooks)(void);
|
||||
|
||||
// DEPRECATED: compatibility for code compiled against pre-1.4.1 headers.
|
||||
typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown;
|
||||
|
||||
// This function will unload RenderDoc's crash handler.
|
||||
//
|
||||
// If you use your own crash handler and don't want RenderDoc's handler to
|
||||
// intercede, you can call this function to unload it and any unhandled
|
||||
// exceptions will pass to the next handler.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)(void);
|
||||
|
||||
// Sets the capture file path template
|
||||
//
|
||||
// pathtemplate is a UTF-8 string that gives a template for how captures will be named
|
||||
// and where they will be saved.
|
||||
//
|
||||
// Any extension is stripped off the path, and captures are saved in the directory
|
||||
// specified, and named with the filename and the frame number appended. If the
|
||||
// directory does not exist it will be created, including any parent directories.
|
||||
//
|
||||
// If pathtemplate is NULL, the template will remain unchanged
|
||||
//
|
||||
// Example:
|
||||
//
|
||||
// SetCaptureFilePathTemplate("my_captures/example");
|
||||
//
|
||||
// Capture #1 -> my_captures/example_frame123.rdc
|
||||
// Capture #2 -> my_captures/example_frame456.rdc
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFilePathTemplate)(const char *pathtemplate);
|
||||
|
||||
// returns the current capture path template, see SetCaptureFileTemplate above, as a UTF-8 string
|
||||
typedef const char *(RENDERDOC_CC *pRENDERDOC_GetCaptureFilePathTemplate)(void);
|
||||
|
||||
// DEPRECATED: compatibility for code compiled against pre-1.1.2 headers.
|
||||
typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathTemplate;
|
||||
typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathTemplate;
|
||||
|
||||
// returns the number of captures that have been made
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetNumCaptures)(void);
|
||||
|
||||
// This function returns the details of a capture, by index. New captures are added
|
||||
// to the end of the list.
|
||||
//
|
||||
// filename will be filled with the absolute path to the capture file, as a UTF-8 string
|
||||
// pathlength will be written with the length in bytes of the filename string
|
||||
// timestamp will be written with the time of the capture, in seconds since the Unix epoch
|
||||
//
|
||||
// Any of the parameters can be NULL and they'll be skipped.
|
||||
//
|
||||
// The function will return 1 if the capture index is valid, or 0 if the index is invalid
|
||||
// If the index is invalid, the values will be unchanged
|
||||
//
|
||||
// Note: when captures are deleted in the UI they will remain in this list, so the
|
||||
// capture path may not exist anymore.
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *filename,
|
||||
uint32_t *pathlength, uint64_t *timestamp);
|
||||
|
||||
// Sets the comments associated with a capture file. These comments are displayed in the
|
||||
// UI program when opening.
|
||||
//
|
||||
// filePath should be a path to the capture file to add comments to. If set to NULL or ""
|
||||
// the most recent capture file created made will be used instead.
|
||||
// comments should be a NULL-terminated UTF-8 string to add as comments.
|
||||
//
|
||||
// Any existing comments will be overwritten.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFileComments)(const char *filePath,
|
||||
const char *comments);
|
||||
|
||||
// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsTargetControlConnected)(void);
|
||||
|
||||
// DEPRECATED: compatibility for code compiled against pre-1.1.1 headers.
|
||||
// This was renamed to IsTargetControlConnected in API 1.1.1, the old typedef is kept here for
|
||||
// backwards compatibility with old code, it is castable either way since it's ABI compatible
|
||||
// as the same function pointer type.
|
||||
typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessConnected;
|
||||
|
||||
// This function will launch the Replay UI associated with the RenderDoc library injected
|
||||
// into the running application.
|
||||
//
|
||||
// if connectTargetControl is 1, the Replay UI will be launched with a command line parameter
|
||||
// to connect to this application
|
||||
// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open
|
||||
// if cmdline is NULL, the command line will be empty.
|
||||
//
|
||||
// returns the PID of the replay UI if successful, 0 if not successful.
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTargetControl,
|
||||
const char *cmdline);
|
||||
|
||||
// RenderDoc can return a higher version than requested if it's backwards compatible,
|
||||
// this function returns the actual version returned. If a parameter is NULL, it will be
|
||||
// ignored and the others will be filled out.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch);
|
||||
|
||||
// Requests that the replay UI show itself (if hidden or not the current top window). This can be
|
||||
// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle
|
||||
// showing the UI after making a capture.
|
||||
//
|
||||
// This will return 1 if the request was successfully passed on, though it's not guaranteed that
|
||||
// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current
|
||||
// target control connection to make such a request, or if there was another error
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(void);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// Capturing functions
|
||||
//
|
||||
|
||||
// A device pointer is a pointer to the API's root handle.
|
||||
//
|
||||
// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc
|
||||
typedef void *RENDERDOC_DevicePointer;
|
||||
|
||||
// A window handle is the OS's native window handle
|
||||
//
|
||||
// This would be an HWND, GLXDrawable, etc
|
||||
typedef void *RENDERDOC_WindowHandle;
|
||||
|
||||
// A helper macro for Vulkan, where the device handle cannot be used directly.
|
||||
//
|
||||
// Passing the VkInstance to this macro will return the RENDERDOC_DevicePointer to use.
|
||||
//
|
||||
// Specifically, the value needed is the dispatch table pointer, which sits as the first
|
||||
// pointer-sized object in the memory pointed to by the VkInstance. Thus we cast to a void** and
|
||||
// indirect once.
|
||||
#define RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(inst) (*((void **)(inst)))
|
||||
|
||||
// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will
|
||||
// respond to keypresses. Neither parameter can be NULL
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device,
|
||||
RENDERDOC_WindowHandle wndHandle);
|
||||
|
||||
// capture the next frame on whichever window and API is currently considered active
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_TriggerCapture)(void);
|
||||
|
||||
// capture the next N frames on whichever window and API is currently considered active
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_TriggerMultiFrameCapture)(uint32_t numFrames);
|
||||
|
||||
// When choosing either a device pointer or a window handle to capture, you can pass NULL.
|
||||
// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify
|
||||
// any API rendering to a specific window, or a specific API instance rendering to any window,
|
||||
// or in the simplest case of one window and one API, you can just pass NULL for both.
|
||||
//
|
||||
// In either case, if there are two or more possible matching (device,window) pairs it
|
||||
// is undefined which one will be captured.
|
||||
//
|
||||
// Note: for headless rendering you can pass NULL for the window handle and either specify
|
||||
// a device pointer or leave it NULL as above.
|
||||
|
||||
// Immediately starts capturing API calls on the specified device pointer and window handle.
|
||||
//
|
||||
// If there is no matching thing to capture (e.g. no supported API has been initialised),
|
||||
// this will do nothing.
|
||||
//
|
||||
// The results are undefined (including crashes) if two captures are started overlapping,
|
||||
// even on separate devices and/oror windows.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device,
|
||||
RENDERDOC_WindowHandle wndHandle);
|
||||
|
||||
// Returns whether or not a frame capture is currently ongoing anywhere.
|
||||
//
|
||||
// This will return 1 if a capture is ongoing, and 0 if there is no capture running
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)(void);
|
||||
|
||||
// Ends capturing immediately.
|
||||
//
|
||||
// This will return 1 if the capture succeeded, and 0 if there was an error capturing.
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device,
|
||||
RENDERDOC_WindowHandle wndHandle);
|
||||
|
||||
// Ends capturing immediately and discard any data stored without saving to disk.
|
||||
//
|
||||
// This will return 1 if the capture was discarded, and 0 if there was an error or no capture
|
||||
// was in progress
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device,
|
||||
RENDERDOC_WindowHandle wndHandle);
|
||||
|
||||
// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom
|
||||
// title to the capture produced which will be displayed in the UI.
|
||||
//
|
||||
// If multiple captures are ongoing, this title will be applied to the first capture to end after
|
||||
// this call. The second capture to end will have no title, unless this function is called again.
|
||||
//
|
||||
// Calling this function has no effect if no capture is currently running, and if it is called
|
||||
// multiple times only the last title will be used.
|
||||
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title);
|
||||
|
||||
// Annotations API:
|
||||
//
|
||||
// These functions allow you to specify annotations either on a per-command level, or a per-object
|
||||
// level.
|
||||
//
|
||||
// Basic types of annotations are supported, as well as vector versions and references to API objects.
|
||||
//
|
||||
// The annotations are stored as keys, with the key being a dot-separated path allowing arbitrary
|
||||
// nesting and user organisation. The keys are sorted in human order so `foo.2.bar` will be displayed
|
||||
// before `foo.10.bar` to allow creation of arrays if desired.
|
||||
//
|
||||
// Deleting an annotation can be done by assigning an empty value to it.
|
||||
|
||||
// the type of an annotation value, or Empty to delete an annotation
|
||||
typedef enum RENDERDOC_AnnotationType
|
||||
{
|
||||
eRENDERDOC_Empty,
|
||||
eRENDERDOC_Bool,
|
||||
eRENDERDOC_Int32,
|
||||
eRENDERDOC_UInt32,
|
||||
eRENDERDOC_Int64,
|
||||
eRENDERDOC_UInt64,
|
||||
eRENDERDOC_Float,
|
||||
eRENDERDOC_Double,
|
||||
eRENDERDOC_String,
|
||||
eRENDERDOC_APIObject,
|
||||
eRENDERDOC_AnnotationMax = 0x7FFFFFFF,
|
||||
} RENDERDOC_AnnotationType;
|
||||
|
||||
// a union with vector annotation value data
|
||||
typedef union RENDERDOC_AnnotationVectorValue
|
||||
{
|
||||
bool boolean[4];
|
||||
int32_t int32[4];
|
||||
int64_t int64[4];
|
||||
uint32_t uint32[4];
|
||||
uint64_t uint64[4];
|
||||
float float32[4];
|
||||
double float64[4];
|
||||
} RENDERDOC_AnnotationVectorValue;
|
||||
|
||||
// a union with scalar annotation value data
|
||||
typedef union RENDERDOC_AnnotationValue
|
||||
{
|
||||
bool boolean;
|
||||
int32_t int32;
|
||||
int64_t int64;
|
||||
uint32_t uint32;
|
||||
uint64_t uint64;
|
||||
float float32;
|
||||
double float64;
|
||||
|
||||
RENDERDOC_AnnotationVectorValue vector;
|
||||
|
||||
const char *string;
|
||||
void *apiObject;
|
||||
} RENDERDOC_AnnotationValue;
|
||||
|
||||
// a struct for specifying a GL object, as we don't have pointers we can use so instead we specify a
|
||||
// pointer to this struct giving both the type and the name
|
||||
typedef struct RENDERDOC_GLResourceReference
|
||||
{
|
||||
// this is the same GLenum identifier as passed to glObjectLabel
|
||||
uint32_t identifier;
|
||||
uint32_t name;
|
||||
} GLResourceReference;
|
||||
|
||||
// simple C++ helpers to avoid the need for a temporary objects for value passing and GL object specification
|
||||
#ifdef __cplusplus
|
||||
struct RDGLObjectHelper
|
||||
{
|
||||
RENDERDOC_GLResourceReference gl;
|
||||
|
||||
RDGLObjectHelper(uint32_t identifier, uint32_t name)
|
||||
{
|
||||
gl.identifier = identifier;
|
||||
gl.name = name;
|
||||
}
|
||||
|
||||
operator RENDERDOC_GLResourceReference *() { return ≷ }
|
||||
};
|
||||
|
||||
struct RDAnnotationHelper
|
||||
{
|
||||
RENDERDOC_AnnotationValue val;
|
||||
|
||||
RDAnnotationHelper(bool b) { val.boolean = b; }
|
||||
RDAnnotationHelper(int32_t i) { val.int32 = i; }
|
||||
RDAnnotationHelper(int64_t i) { val.int64 = i; }
|
||||
RDAnnotationHelper(uint32_t i) { val.uint32 = i; }
|
||||
RDAnnotationHelper(uint64_t i) { val.uint64 = i; }
|
||||
RDAnnotationHelper(float f) { val.float32 = f; }
|
||||
RDAnnotationHelper(double d) { val.float64 = d; }
|
||||
RDAnnotationHelper(const char *s) { val.string = s; }
|
||||
|
||||
operator RENDERDOC_AnnotationValue *() { return &val; }
|
||||
};
|
||||
#endif
|
||||
|
||||
// The device is specified in the same way as other API calls that take a RENDERDOC_DevicePointer
|
||||
// to specify the device.
|
||||
//
|
||||
// The object or queue/commandbuffer will depend on the graphics API in question.
|
||||
//
|
||||
// Return value:
|
||||
// 0 - The annotation was applied successfully.
|
||||
// 1 - The device is unknown/invalid
|
||||
// 2 - The device is valid but the annotation is not supported for API-specific reasons, such as an
|
||||
// unrecognised or invalid object or queue/commandbuffer
|
||||
// 3 - The call is ill-formed or invalid e.g. empty is specified with a value pointer, or non-empty
|
||||
// is specified with a NULL value pointer
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_SetObjectAnnotation)(RENDERDOC_DevicePointer device,
|
||||
void *object, const char *key,
|
||||
RENDERDOC_AnnotationType valueType,
|
||||
uint32_t valueVectorWidth,
|
||||
const RENDERDOC_AnnotationValue *value);
|
||||
|
||||
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_SetCommandAnnotation)(
|
||||
RENDERDOC_DevicePointer device, void *queueOrCommandBuffer, const char *key,
|
||||
RENDERDOC_AnnotationType valueType, uint32_t valueVectorWidth,
|
||||
const RENDERDOC_AnnotationValue *value);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// RenderDoc API versions
|
||||
//
|
||||
|
||||
// RenderDoc uses semantic versioning (http://semver.org/).
|
||||
//
|
||||
// MAJOR version is incremented when incompatible API changes happen.
|
||||
// MINOR version is incremented when functionality is added in a backwards-compatible manner.
|
||||
// PATCH version is incremented when backwards-compatible bug fixes happen.
|
||||
//
|
||||
// Note that this means the API returned can be higher than the one you might have requested.
|
||||
// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned
|
||||
// instead of 1.0.0. You can check this with the GetAPIVersion entry point
|
||||
typedef enum RENDERDOC_Version
|
||||
{
|
||||
eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00
|
||||
eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01
|
||||
eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02
|
||||
eRENDERDOC_API_Version_1_1_0 = 10100, // RENDERDOC_API_1_1_0 = 1 01 00
|
||||
eRENDERDOC_API_Version_1_1_1 = 10101, // RENDERDOC_API_1_1_1 = 1 01 01
|
||||
eRENDERDOC_API_Version_1_1_2 = 10102, // RENDERDOC_API_1_1_2 = 1 01 02
|
||||
eRENDERDOC_API_Version_1_2_0 = 10200, // RENDERDOC_API_1_2_0 = 1 02 00
|
||||
eRENDERDOC_API_Version_1_3_0 = 10300, // RENDERDOC_API_1_3_0 = 1 03 00
|
||||
eRENDERDOC_API_Version_1_4_0 = 10400, // RENDERDOC_API_1_4_0 = 1 04 00
|
||||
eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01
|
||||
eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02
|
||||
eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00
|
||||
eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00
|
||||
eRENDERDOC_API_Version_1_7_0 = 10700, // RENDERDOC_API_1_7_0 = 1 07 00
|
||||
} RENDERDOC_Version;
|
||||
|
||||
// API version changelog:
|
||||
//
|
||||
// 1.0.0 - initial release
|
||||
// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered
|
||||
// by keypress or TriggerCapture, instead of Start/EndFrameCapture.
|
||||
// 1.0.2 - Refactor: Renamed eRENDERDOC_Option_DebugDeviceMode to eRENDERDOC_Option_APIValidation
|
||||
// 1.1.0 - Add feature: TriggerMultiFrameCapture(). Backwards compatible with 1.0.x since the new
|
||||
// function pointer is added to the end of the struct, the original layout is identical
|
||||
// 1.1.1 - Refactor: Renamed remote access to target control (to better disambiguate from remote
|
||||
// replay/remote server concept in replay UI)
|
||||
// 1.1.2 - Refactor: Renamed "log file" in function names to just capture, to clarify that these
|
||||
// are captures and not debug logging files. This is the first API version in the v1.0
|
||||
// branch.
|
||||
// 1.2.0 - Added feature: SetCaptureFileComments() to add comments to a capture file that will be
|
||||
// displayed in the UI program on load.
|
||||
// 1.3.0 - Added feature: New capture option eRENDERDOC_Option_AllowUnsupportedVendorExtensions
|
||||
// which allows users to opt-in to allowing unsupported vendor extensions to function.
|
||||
// Should be used at the user's own risk.
|
||||
// Refactor: Renamed eRENDERDOC_Option_VerifyMapWrites to
|
||||
// eRENDERDOC_Option_VerifyBufferAccess, which now also controls initialisation to
|
||||
// 0xdddddddd of uninitialised buffer contents.
|
||||
// 1.4.0 - Added feature: DiscardFrameCapture() to discard a frame capture in progress and stop
|
||||
// capturing without saving anything to disk.
|
||||
// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening
|
||||
// 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option.
|
||||
// 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected
|
||||
// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a
|
||||
// capture made with StartFrameCapture() or EndFrameCapture()
|
||||
// 1.7.0 - Added feature: SetObjectAnnotation() / SetCommandAnnotation() for adding rich
|
||||
// annotations to objects and command streams
|
||||
|
||||
typedef struct RENDERDOC_API_1_7_0
|
||||
{
|
||||
pRENDERDOC_GetAPIVersion GetAPIVersion;
|
||||
|
||||
pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32;
|
||||
pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32;
|
||||
|
||||
pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32;
|
||||
pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32;
|
||||
|
||||
pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys;
|
||||
pRENDERDOC_SetCaptureKeys SetCaptureKeys;
|
||||
|
||||
pRENDERDOC_GetOverlayBits GetOverlayBits;
|
||||
pRENDERDOC_MaskOverlayBits MaskOverlayBits;
|
||||
|
||||
// Shutdown was renamed to RemoveHooks in 1.4.1.
|
||||
// These unions allow old code to continue compiling without changes
|
||||
union
|
||||
{
|
||||
pRENDERDOC_Shutdown Shutdown;
|
||||
pRENDERDOC_RemoveHooks RemoveHooks;
|
||||
};
|
||||
pRENDERDOC_UnloadCrashHandler UnloadCrashHandler;
|
||||
|
||||
// Get/SetLogFilePathTemplate was renamed to Get/SetCaptureFilePathTemplate in 1.1.2.
|
||||
// These unions allow old code to continue compiling without changes
|
||||
union
|
||||
{
|
||||
// deprecated name
|
||||
pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate;
|
||||
// current name
|
||||
pRENDERDOC_SetCaptureFilePathTemplate SetCaptureFilePathTemplate;
|
||||
};
|
||||
union
|
||||
{
|
||||
// deprecated name
|
||||
pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate;
|
||||
// current name
|
||||
pRENDERDOC_GetCaptureFilePathTemplate GetCaptureFilePathTemplate;
|
||||
};
|
||||
|
||||
pRENDERDOC_GetNumCaptures GetNumCaptures;
|
||||
pRENDERDOC_GetCapture GetCapture;
|
||||
|
||||
pRENDERDOC_TriggerCapture TriggerCapture;
|
||||
|
||||
// IsRemoteAccessConnected was renamed to IsTargetControlConnected in 1.1.1.
|
||||
// This union allows old code to continue compiling without changes
|
||||
union
|
||||
{
|
||||
// deprecated name
|
||||
pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected;
|
||||
// current name
|
||||
pRENDERDOC_IsTargetControlConnected IsTargetControlConnected;
|
||||
};
|
||||
pRENDERDOC_LaunchReplayUI LaunchReplayUI;
|
||||
|
||||
pRENDERDOC_SetActiveWindow SetActiveWindow;
|
||||
|
||||
pRENDERDOC_StartFrameCapture StartFrameCapture;
|
||||
pRENDERDOC_IsFrameCapturing IsFrameCapturing;
|
||||
pRENDERDOC_EndFrameCapture EndFrameCapture;
|
||||
|
||||
// new function in 1.1.0
|
||||
pRENDERDOC_TriggerMultiFrameCapture TriggerMultiFrameCapture;
|
||||
|
||||
// new function in 1.2.0
|
||||
pRENDERDOC_SetCaptureFileComments SetCaptureFileComments;
|
||||
|
||||
// new function in 1.4.0
|
||||
pRENDERDOC_DiscardFrameCapture DiscardFrameCapture;
|
||||
|
||||
// new function in 1.5.0
|
||||
pRENDERDOC_ShowReplayUI ShowReplayUI;
|
||||
|
||||
// new function in 1.6.0
|
||||
pRENDERDOC_SetCaptureTitle SetCaptureTitle;
|
||||
|
||||
// new functions in 1.7.0
|
||||
pRENDERDOC_SetObjectAnnotation SetObjectAnnotation;
|
||||
pRENDERDOC_SetCommandAnnotation SetCommandAnnotation;
|
||||
} RENDERDOC_API_1_7_0;
|
||||
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_0_0;
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_0_1;
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_0_2;
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_1_0;
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_1_1;
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_1_2;
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_2_0;
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_3_0;
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_4_0;
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_4_1;
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_4_2;
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_5_0;
|
||||
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_6_0;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// RenderDoc API entry point
|
||||
//
|
||||
// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available.
|
||||
//
|
||||
// The name is the same as the typedef - "RENDERDOC_GetAPI"
|
||||
//
|
||||
// This function is not thread safe, and should not be called on multiple threads at once.
|
||||
// Ideally, call this once as early as possible in your application's startup, before doing
|
||||
// any API work, since some configuration functionality etc has to be done also before
|
||||
// initialising any APIs.
|
||||
//
|
||||
// Parameters:
|
||||
// version is a single value from the RENDERDOC_Version above.
|
||||
//
|
||||
// outAPIPointers will be filled out with a pointer to the corresponding struct of function
|
||||
// pointers.
|
||||
//
|
||||
// Returns:
|
||||
// 1 - if the outAPIPointers has been filled with a pointer to the API struct requested
|
||||
// 0 - if the requested version is not supported or the arguments are invalid.
|
||||
//
|
||||
typedef int(RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
|
@ -23,7 +23,12 @@ target_include_directories(lsfg-vk-common
|
|||
target_include_directories(lsfg-vk-common SYSTEM
|
||||
PRIVATE thirdparty/include)
|
||||
|
||||
if(LSFGVK_INSTALL_DEVELOP)
|
||||
target_compile_options(lsfg-vk-common PUBLIC
|
||||
-Wno-cast-function-type-strict # Vulkan function pointers
|
||||
-Wno-shadow # Shadowing variables used to be common practice
|
||||
)
|
||||
|
||||
if(LSFGVK_INSTALL_LIBRARIES)
|
||||
install(TARGETS lsfg-vk-common
|
||||
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
|
||||
install(DIRECTORY "include/lsfg-vk-common/"
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
namespace ls {
|
||||
|
||||
/// find the location of the Lossless.dll
|
||||
/// find the location of the lsfg-vk.dll
|
||||
/// @returns the path to the DLL
|
||||
/// @throws ls::error if the DLL could not be found
|
||||
std::filesystem::path findShaderDll();
|
||||
|
|
|
|||
|
|
@ -21,7 +21,12 @@ namespace vk {
|
|||
template<typename T>
|
||||
Buffer(const vk::Vulkan& vk, const T& data,
|
||||
VkBufferUsageFlags usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
|
||||
: Buffer(vk, reinterpret_cast<const void*>(&data), sizeof(T), usage) {}
|
||||
: Buffer(
|
||||
vk,
|
||||
reinterpret_cast<const void*>(&data), // NOLINT (unsafe cast)
|
||||
sizeof(T),
|
||||
usage
|
||||
) {}
|
||||
|
||||
/// create a buffer
|
||||
/// @param vk the vulkan instance
|
||||
|
|
|
|||
|
|
@ -42,7 +42,8 @@ namespace vk {
|
|||
void blitImage(const vk::Vulkan& vk,
|
||||
const std::vector<vk::Barrier>& preBarriers,
|
||||
std::pair<VkImage, VkImage> images, VkExtent2D extent,
|
||||
const std::vector<vk::Barrier>& postBarriers) const;
|
||||
const std::vector<vk::Barrier>& postBarriers,
|
||||
uint32_t srcLayer = 0, uint32_t dstLayer = 0) const;
|
||||
|
||||
/// insert a bunch of barriers
|
||||
/// @param vk the vulkan instance
|
||||
|
|
@ -68,7 +69,8 @@ namespace vk {
|
|||
/// @param buffer the source buffer
|
||||
/// @param image the destination image
|
||||
void copyBufferToImage(const vk::Vulkan& vk,
|
||||
const vk::Buffer& buffer, const vk::Image& image) const;
|
||||
const vk::Buffer& buffer, const vk::Image& image,
|
||||
uint32_t dstLayer = 0) const;
|
||||
|
||||
/// end recording commands
|
||||
/// @param vk the vulkan instance
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include "../helpers/pointers.hpp"
|
||||
#include "vulkan.hpp"
|
||||
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
|
@ -26,7 +27,9 @@ namespace vk {
|
|||
VkFormat format = VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
std::optional<int> importFd = std::nullopt,
|
||||
std::optional<int*> exportFd = std::nullopt);
|
||||
std::optional<int*> exportFd = std::nullopt,
|
||||
uint32_t arrayLayers = 1
|
||||
);
|
||||
|
||||
/// get the image handle
|
||||
/// @return the image handle
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ void ConfigFile::createDefaultConfigFile(const std::filesystem::path& path) {
|
|||
ofs << R"(version = 2
|
||||
|
||||
[global]
|
||||
# dll = '/media/games/Lossless Scaling/Lossless.dll' # if you don't have LS in the default location
|
||||
# dll = '/media/games/Lossless Scaling/lsfg-vk.dll' # if you don't have LS in the default location
|
||||
allow_fp16 = true # this will improve give a MASSIVE performance boost on AMD, but be super slow on older (!) NVIDIA GPUs
|
||||
|
||||
[[profile]]
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ std::filesystem::path ls::findShaderDll() {
|
|||
auto base = std::filesystem::path(xdgPath);
|
||||
|
||||
for (const auto& frag : FRAGMENTS) {
|
||||
auto full = base / frag / "Lossless Scaling" / "Lossless.dll";
|
||||
auto full = base / frag / "Lossless Scaling" / "lsfg-vk.dll";
|
||||
if (std::filesystem::exists(full))
|
||||
return full;
|
||||
}
|
||||
|
|
@ -34,16 +34,16 @@ std::filesystem::path ls::findShaderDll() {
|
|||
auto base = std::filesystem::path(homePath);
|
||||
|
||||
for (const auto& frag : FRAGMENTS) {
|
||||
auto full = base / frag / "Lossless Scaling" / "Lossless.dll";
|
||||
auto full = base / frag / "Lossless Scaling" / "lsfg-vk.dll";
|
||||
if (std::filesystem::exists(full))
|
||||
return full;
|
||||
}
|
||||
}
|
||||
|
||||
// fallback to same directory
|
||||
auto local = std::filesystem::current_path() / "Lossless.dll";
|
||||
auto local = std::filesystem::current_path() / "lsfg-vk.dll";
|
||||
if (std::filesystem::exists(local))
|
||||
return local;
|
||||
|
||||
throw ls::error("unable to locate Lossless.dll, please set the path in the configuration");
|
||||
throw ls::error("unable to locate lsfg-vk.dll, please set the path in the configuration");
|
||||
}
|
||||
|
|
|
|||
|
|
@ -105,7 +105,8 @@ void CommandBuffer::dispatch(const vk::Vulkan& vk,
|
|||
void CommandBuffer::blitImage(const vk::Vulkan& vk,
|
||||
const std::vector<vk::Barrier>& preBarriers,
|
||||
std::pair<VkImage, VkImage> images, VkExtent2D extent,
|
||||
const std::vector<vk::Barrier>& postBarriers) const {
|
||||
const std::vector<vk::Barrier>& postBarriers,
|
||||
uint32_t srcLayer, uint32_t dstLayer) const {
|
||||
vk.df().CmdPipelineBarrier(*this->commandBuffer,
|
||||
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0,
|
||||
|
|
@ -117,7 +118,8 @@ void CommandBuffer::blitImage(const vk::Vulkan& vk,
|
|||
const VkImageBlit region{
|
||||
.srcSubresource = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.layerCount = 1
|
||||
.baseArrayLayer = srcLayer,
|
||||
.layerCount = 1,
|
||||
},
|
||||
.srcOffsets = {
|
||||
{ 0, 0, 0 },
|
||||
|
|
@ -126,6 +128,7 @@ void CommandBuffer::blitImage(const vk::Vulkan& vk,
|
|||
},
|
||||
.dstSubresource = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseArrayLayer = dstLayer,
|
||||
.layerCount = 1
|
||||
},
|
||||
.dstOffsets = {
|
||||
|
|
@ -151,7 +154,8 @@ void CommandBuffer::blitImage(const vk::Vulkan& vk,
|
|||
}
|
||||
|
||||
void CommandBuffer::copyBufferToImage(const vk::Vulkan& vk,
|
||||
const vk::Buffer& buffer, const vk::Image& image) const {
|
||||
const vk::Buffer& buffer, const vk::Image& image,
|
||||
uint32_t dstLayer) const {
|
||||
const VkImageMemoryBarrier barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.srcAccessMask = VK_ACCESS_NONE,
|
||||
|
|
@ -179,6 +183,7 @@ void CommandBuffer::copyBufferToImage(const vk::Vulkan& vk,
|
|||
.bufferImageHeight = 0,
|
||||
.imageSubresource = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseArrayLayer = dstLayer,
|
||||
.layerCount = 1
|
||||
},
|
||||
.imageExtent = {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <bitset>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
|
@ -16,7 +17,7 @@ namespace {
|
|||
/// create a image
|
||||
ls::owned_ptr<VkImage> createImage(const vk::Vulkan& vk,
|
||||
VkExtent2D extent, VkFormat format, VkImageUsageFlags usage,
|
||||
bool external) {
|
||||
bool external, uint32_t arrayLayers) {
|
||||
VkImage handle{};
|
||||
|
||||
const VkExternalMemoryImageCreateInfo externalInfo{
|
||||
|
|
@ -34,7 +35,7 @@ namespace {
|
|||
.depth = 1
|
||||
},
|
||||
.mipLevels = 1,
|
||||
.arrayLayers = 1,
|
||||
.arrayLayers = arrayLayers,
|
||||
.samples = VK_SAMPLE_COUNT_1_BIT,
|
||||
.usage = usage,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE
|
||||
|
|
@ -121,20 +122,20 @@ namespace {
|
|||
}
|
||||
/// create an image view
|
||||
ls::owned_ptr<VkImageView> createImageView(const vk::Vulkan& vk,
|
||||
VkImage image, VkFormat format) {
|
||||
VkImage image, VkFormat format, uint32_t arrayLayers) {
|
||||
VkImageView handle{};
|
||||
|
||||
const VkImageViewCreateInfo viewInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
|
||||
.image = image,
|
||||
.viewType = VK_IMAGE_VIEW_TYPE_2D,
|
||||
.viewType = arrayLayers == 1 ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_2D_ARRAY,
|
||||
.format = format,
|
||||
.subresourceRange = {
|
||||
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = 1,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = 1
|
||||
.layerCount = arrayLayers
|
||||
}
|
||||
};
|
||||
auto res = vk.df().CreateImageView(vk.dev(), &viewInfo, VK_NULL_HANDLE, &handle);
|
||||
|
|
@ -155,10 +156,13 @@ Image::Image(const vk::Vulkan& vk,
|
|||
VkFormat format,
|
||||
VkImageUsageFlags usage,
|
||||
std::optional<int> importFd,
|
||||
std::optional<int*> exportFd) :
|
||||
std::optional<int*> exportFd,
|
||||
uint32_t arrayLayers
|
||||
) :
|
||||
image(createImage(vk,
|
||||
extent, format, usage,
|
||||
importFd.has_value() || exportFd.has_value()
|
||||
importFd.has_value() || exportFd.has_value(),
|
||||
arrayLayers
|
||||
)),
|
||||
memory(allocateMemory(vk,
|
||||
*this->image,
|
||||
|
|
@ -166,7 +170,8 @@ Image::Image(const vk::Vulkan& vk,
|
|||
)),
|
||||
view(createImageView(vk,
|
||||
*this->image,
|
||||
format
|
||||
format,
|
||||
arrayLayers
|
||||
)),
|
||||
extent(extent) {
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,4 +29,3 @@ Checks:
|
|||
# Vulkan layers often require C-style memory access
|
||||
- -cppcoreguidelines-pro-bounds-pointer-arithmetic
|
||||
- -cppcoreguidelines-pro-type-union-access
|
||||
- -clang-diagnostic-unsafe-buffer-usage
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ target_link_libraries(lsfg-vk-layer
|
|||
PUBLIC lsfg-vk-backend)
|
||||
|
||||
target_compile_options(lsfg-vk-layer PRIVATE
|
||||
-Wno-unknown-warning-option
|
||||
-Wno-unsafe-buffer-usage) # Array indexing
|
||||
|
||||
set_target_properties(lsfg-vk-layer PROPERTIES
|
||||
|
|
|
|||
|
|
@ -22,17 +22,17 @@
|
|||
using namespace lsfgvk::layer;
|
||||
|
||||
namespace {
|
||||
// global layer info initialized at layer negotiation
|
||||
/// Global layer info initialized at layer negotiation
|
||||
struct LayerInfo {
|
||||
std::unordered_map<std::string, PFN_vkVoidFunction> map; //!< function pointer override map
|
||||
std::unordered_map<std::string, PFN_vkVoidFunction> map; //!< Function pointer override map
|
||||
PFN_vkGetInstanceProcAddr GetInstanceProcAddr;
|
||||
|
||||
Root root;
|
||||
}* layer_info; // NOLINT (global variable)
|
||||
|
||||
// instance-wide info initialized at instance creation(s)
|
||||
/// Instance-wide info initialized at instance creation(s)
|
||||
struct InstanceInfo {
|
||||
std::vector<VkInstance> handles; // there may be several instances
|
||||
std::vector<VkInstance> handles; // There may be several instances
|
||||
vk::VulkanInstanceFuncs funcs;
|
||||
|
||||
std::unordered_map<VkDevice, vk::Vulkan> devices;
|
||||
|
|
@ -40,44 +40,44 @@ namespace {
|
|||
std::unordered_map<VkSwapchainKHR, SwapchainInfo> swapchainInfos;
|
||||
}* instance_info; // NOLINT (global variable)
|
||||
|
||||
// create instance
|
||||
/// Create instance
|
||||
VkResult myvkCreateInstance(
|
||||
const VkInstanceCreateInfo* info,
|
||||
const VkAllocationCallbacks* alloc,
|
||||
VkInstance* instance) {
|
||||
// apply layer chaining
|
||||
// Apply layer chaining
|
||||
auto* layerInfo = reinterpret_cast<VkLayerInstanceCreateInfo*>(const_cast<void*>(info->pNext));
|
||||
while (layerInfo && (layerInfo->sType != VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO
|
||||
|| layerInfo->function != VK_LAYER_LINK_INFO)) {
|
||||
layerInfo = reinterpret_cast<VkLayerInstanceCreateInfo*>(const_cast<void*>(layerInfo->pNext));
|
||||
}
|
||||
if (!layerInfo) {
|
||||
std::cerr << "lsfg-vk: no layer info found in pNext chain, "
|
||||
std::cerr << "lsfg-vk: No layer info found in pNext chain, "
|
||||
"the previous layer does not follow spec\n";
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
auto* linkInfo = layerInfo->u.pLayerInfo;
|
||||
if (!linkInfo) {
|
||||
std::cerr << "lsfg-vk: link info is null, "
|
||||
std::cerr << "lsfg-vk: Link info is null, "
|
||||
"the previous layer does not follow spec\n";
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
layer_info->GetInstanceProcAddr = linkInfo->pfnNextGetInstanceProcAddr;
|
||||
if (!layer_info->GetInstanceProcAddr) {
|
||||
std::cerr << "lsfg-vk: next layer's vkGetInstanceProcAddr is null, "
|
||||
std::cerr << "lsfg-vk: Next layer's vkGetInstanceProcAddr is null, "
|
||||
"the previous layer does not follow spec\n";
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
layerInfo->u.pLayerInfo = linkInfo->pNext; // advance for next layer
|
||||
layerInfo->u.pLayerInfo = linkInfo->pNext; // Advance for next layer
|
||||
|
||||
// create instance
|
||||
// Create instance
|
||||
auto* vkCreateInstance = reinterpret_cast<PFN_vkCreateInstance>(
|
||||
layer_info->GetInstanceProcAddr(VK_NULL_HANDLE, "vkCreateInstance"));
|
||||
if (!vkCreateInstance) {
|
||||
std::cerr << "lsfg-vk: failed to get next layer's vkCreateInstance, "
|
||||
std::cerr << "lsfg-vk: Failed to get next layer's vkCreateInstance, "
|
||||
"the previous layer does not follow spec\n";
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
|
@ -103,64 +103,64 @@ namespace {
|
|||
return VK_SUCCESS;
|
||||
} catch (const ls::vulkan_error& e) {
|
||||
if (e.error() == VK_ERROR_EXTENSION_NOT_PRESENT)
|
||||
std::cerr << "lsfg-vk: required Vulkan instance extensions are not present. "
|
||||
std::cerr << "lsfg-vk: Required Vulkan instance extensions are not present. "
|
||||
"Your GPU driver is not supported.\n";
|
||||
return e.error();
|
||||
}
|
||||
}
|
||||
|
||||
// create device
|
||||
/// Create device
|
||||
VkResult myvkCreateDevice(
|
||||
VkPhysicalDevice physdev,
|
||||
const VkDeviceCreateInfo* info,
|
||||
const VkAllocationCallbacks* alloc,
|
||||
VkDevice* device) {
|
||||
// apply layer chaining
|
||||
// Apply layer chaining
|
||||
auto* layerInfo = reinterpret_cast<VkLayerDeviceCreateInfo*>(const_cast<void*>(info->pNext));
|
||||
while (layerInfo && (layerInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO
|
||||
|| layerInfo->function != VK_LAYER_LINK_INFO)) {
|
||||
layerInfo = reinterpret_cast<VkLayerDeviceCreateInfo*>(const_cast<void*>(layerInfo->pNext));
|
||||
}
|
||||
if (!layerInfo) {
|
||||
std::cerr << "lsfg-vk: no layer info found in pNext chain, "
|
||||
std::cerr << "lsfg-vk: No layer info found in pNext chain, "
|
||||
"the previous layer does not follow spec\n";
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
auto* linkInfo = layerInfo->u.pLayerInfo;
|
||||
if (!linkInfo) {
|
||||
std::cerr << "lsfg-vk: link info is null, "
|
||||
std::cerr << "lsfg-vk: Link info is null, "
|
||||
"the previous layer does not follow spec\n";
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
instance_info->funcs.GetDeviceProcAddr = linkInfo->pfnNextGetDeviceProcAddr;
|
||||
if (!linkInfo->pfnNextGetDeviceProcAddr) {
|
||||
std::cerr << "lsfg-vk: next layer's vkGetDeviceProcAddr is null, "
|
||||
std::cerr << "lsfg-vk: Next layer's vkGetDeviceProcAddr is null, "
|
||||
"the previous layer does not follow spec\n";
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
layerInfo->u.pLayerInfo = linkInfo->pNext; // advance for next layer
|
||||
layerInfo->u.pLayerInfo = linkInfo->pNext; // Advance for next layer
|
||||
|
||||
// fetch device loader functions
|
||||
// Fetch device loader functions
|
||||
layerInfo = reinterpret_cast<VkLayerDeviceCreateInfo*>(const_cast<void*>(info->pNext));
|
||||
while (layerInfo && (layerInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO
|
||||
|| layerInfo->function != VK_LOADER_DATA_CALLBACK)) {
|
||||
layerInfo = reinterpret_cast<VkLayerDeviceCreateInfo*>(const_cast<void*>(layerInfo->pNext));
|
||||
}
|
||||
if (!layerInfo) {
|
||||
std::cerr << "lsfg-vk: no layer loader data found in pNext chain.\n";
|
||||
std::cerr << "lsfg-vk: No layer loader data found in pNext chain.\n";
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
auto* setLoaderData = layerInfo->u.pfnSetDeviceLoaderData;
|
||||
if (!setLoaderData) {
|
||||
std::cerr << "lsfg-vk: instance loader data function is null.\n";
|
||||
std::cerr << "lsfg-vk: Instance loader data function is null.\n";
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
// create device
|
||||
// Create device
|
||||
try {
|
||||
VkDeviceCreateInfo newInfo = *info;
|
||||
layer_info->root.modifyDeviceCreateInfo(newInfo,
|
||||
|
|
@ -172,12 +172,12 @@ namespace {
|
|||
);
|
||||
} catch (const ls::vulkan_error& e) {
|
||||
if (e.error() == VK_ERROR_EXTENSION_NOT_PRESENT)
|
||||
std::cerr << "lsfg-vk: required Vulkan device extensions are not present. "
|
||||
std::cerr << "lsfg-vk: Required Vulkan device extensions are not present. "
|
||||
"Your GPU driver is not supported.\n";
|
||||
return e.error();
|
||||
}
|
||||
|
||||
// create layer instance
|
||||
// Create layer instance
|
||||
try {
|
||||
instance_info->devices.emplace(
|
||||
*device,
|
||||
|
|
@ -189,25 +189,25 @@ namespace {
|
|||
)
|
||||
);
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "lsfg-vk: something went wrong during lsfg-vk initialization:\n";
|
||||
std::cerr << "lsfg-vk: Something went wrong during lsfg-vk initialization:\n";
|
||||
std::cerr << "- " << e.what() << '\n';
|
||||
}
|
||||
|
||||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
// destroy device
|
||||
/// Destroy device
|
||||
void myvkDestroyDevice(VkDevice device, const VkAllocationCallbacks* alloc) {
|
||||
// destroy layer instance
|
||||
// Destroy layer instance
|
||||
auto it = instance_info->devices.find(device);
|
||||
if (it != instance_info->devices.end())
|
||||
instance_info->devices.erase(it);
|
||||
|
||||
// destroy device
|
||||
// Destroy device
|
||||
auto vkDestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>(
|
||||
instance_info->funcs.GetDeviceProcAddr(device, "vkDestroyDevice"));
|
||||
if (!vkDestroyDevice) {
|
||||
std::cerr << "lsfg-vk: failed to get next layer's vkDestroyDevice, "
|
||||
std::cerr << "lsfg-vk: Failed to get next layer's vkDestroyDevice, "
|
||||
"the previous layer does not follow spec\n";
|
||||
return;
|
||||
}
|
||||
|
|
@ -215,24 +215,24 @@ namespace {
|
|||
vkDestroyDevice(device, alloc);
|
||||
}
|
||||
|
||||
// destroy instance
|
||||
/// Destroy instance
|
||||
void myvkDestroyInstance(VkInstance instance, const VkAllocationCallbacks* alloc) {
|
||||
// remove instance handle
|
||||
// Remove instance handle
|
||||
auto it = std::ranges::find(instance_info->handles, instance);
|
||||
if (it != instance_info->handles.end())
|
||||
instance_info->handles.erase(it);
|
||||
|
||||
// destroy instance info if no handles remain
|
||||
// Destroy instance info if no handles remain
|
||||
if (instance_info->handles.empty()) {
|
||||
delete instance_info; // NOLINT (memory management)
|
||||
instance_info = nullptr;
|
||||
}
|
||||
|
||||
// destroy instance
|
||||
// Destroy instance
|
||||
auto vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
|
||||
layer_info->GetInstanceProcAddr(instance, "vkDestroyInstance"));
|
||||
if (!vkDestroyInstance) {
|
||||
std::cerr << "lsfg-vk: failed to get next layer's vkDestroyInstance, "
|
||||
std::cerr << "lsfg-vk: Failed to get next layer's vkDestroyInstance, "
|
||||
"the previous layer does not follow spec\n";
|
||||
return;
|
||||
}
|
||||
|
|
@ -240,7 +240,7 @@ namespace {
|
|||
vkDestroyInstance(instance, alloc);
|
||||
}
|
||||
|
||||
// get optional function pointer override
|
||||
/// Get optional function pointer override
|
||||
PFN_vkVoidFunction getProcAddr(const std::string& name) {
|
||||
auto it = layer_info->map.find(name);
|
||||
if (it != layer_info->map.end())
|
||||
|
|
@ -248,7 +248,7 @@ namespace {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
// get instance-level function pointers
|
||||
/// Get instance-level function pointers
|
||||
PFN_vkVoidFunction myvkGetInstanceProcAddr(VkInstance instance, const char* name) {
|
||||
if (!name) return nullptr;
|
||||
|
||||
|
|
@ -259,7 +259,7 @@ namespace {
|
|||
return layer_info->GetInstanceProcAddr(instance, name);
|
||||
}
|
||||
|
||||
// get device-level function pointers
|
||||
/// Get device-level function pointers
|
||||
PFN_vkVoidFunction myvkGetDeviceProcAddr(VkDevice device, const char* name) {
|
||||
if (!name) return nullptr;
|
||||
|
||||
|
|
@ -282,7 +282,7 @@ namespace {
|
|||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
|
||||
try {
|
||||
// retire old swapchain
|
||||
// Retire old swapchain
|
||||
if (info->oldSwapchain) {
|
||||
const auto& info_mapping = instance_info->swapchainInfos.find(info->oldSwapchain);
|
||||
if (info_mapping != instance_info->swapchainInfos.end())
|
||||
|
|
@ -295,9 +295,9 @@ namespace {
|
|||
layer_info->root.removeSwapchainContext(info->oldSwapchain);
|
||||
}
|
||||
|
||||
layer_info->root.update(); // ensure config is up to date
|
||||
layer_info->root.update(); // Ensure config is up to date
|
||||
|
||||
// create swapchain
|
||||
// Create swapchain
|
||||
VkSwapchainCreateInfoKHR newInfo = *info;
|
||||
layer_info->root.modifySwapchainCreateInfo(it->second, newInfo,
|
||||
[=, newInfo = &newInfo]() {
|
||||
|
|
@ -308,7 +308,7 @@ namespace {
|
|||
}
|
||||
);
|
||||
|
||||
// get all swapchain images
|
||||
// Get all swapchain images
|
||||
uint32_t imageCount{};
|
||||
auto res = it->second.df().GetSwapchainImagesKHR(device, *swapchain,
|
||||
&imageCount, VK_NULL_HANDLE);
|
||||
|
|
@ -329,7 +329,7 @@ namespace {
|
|||
.presentMode = newInfo.presentMode
|
||||
}).first->second;
|
||||
|
||||
// create lsfg-vk swapchain
|
||||
// Create lsfg-vk swapchain
|
||||
layer_info->root.createSwapchainContext(it->second, *swapchain, info);
|
||||
|
||||
instance_info->swapchains.emplace(*swapchain,
|
||||
|
|
@ -337,11 +337,11 @@ namespace {
|
|||
|
||||
return res;
|
||||
} catch (const ls::vulkan_error& e) {
|
||||
std::cerr << "lsfg-vk: something went wrong during lsfg-vk swapchain creation:\n";
|
||||
std::cerr << "lsfg-vk: Something went wrong during lsfg-vk swapchain creation:\n";
|
||||
std::cerr << "- " << e.what() << '\n';
|
||||
return e.error();
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "lsfg-vk: something went wrong during lsfg-vk swapchain creation:\n";
|
||||
std::cerr << "lsfg-vk: Something went wrong during lsfg-vk swapchain creation:\n";
|
||||
std::cerr << "- " << e.what() << '\n';
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
|
@ -349,16 +349,15 @@ namespace {
|
|||
|
||||
VkResult myvkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* info) {
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wunknown-warning-option"
|
||||
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
|
||||
VkResult result = VK_SUCCESS;
|
||||
|
||||
// ensure layer config is up to date
|
||||
// Ensure layer config is up to date
|
||||
bool reload{};
|
||||
try {
|
||||
reload = layer_info->root.update();
|
||||
} catch (const std::exception&) {
|
||||
reload = false; // ignore parse errors
|
||||
reload = false; // Ignore parse errors
|
||||
}
|
||||
|
||||
if (reload) {
|
||||
|
|
@ -377,7 +376,7 @@ namespace {
|
|||
}
|
||||
}
|
||||
|
||||
// present each swapchain
|
||||
// Present each swapchain
|
||||
for (size_t i = 0; i < info->swapchainCount; i++) {
|
||||
const auto& swapchain = info->pSwapchains[i];
|
||||
|
||||
|
|
@ -403,7 +402,7 @@ namespace {
|
|||
if (e.error() != VK_ERROR_OUT_OF_DATE_KHR) {
|
||||
std::cerr << "lsfg-vk: something went wrong during lsfg-vk swapchain presentation:\n";
|
||||
std::cerr << "- " << e.what() << '\n';
|
||||
} // silently swallow out-of-date errors
|
||||
} // Silently swallow out-of-date errors
|
||||
|
||||
result = e.error();
|
||||
} catch (const std::exception& e) {
|
||||
|
|
@ -438,7 +437,7 @@ namespace {
|
|||
|
||||
layer_info->root.removeSwapchainContext(swapchain);
|
||||
|
||||
// destroy swapchain
|
||||
// Destroy swapchain
|
||||
it->second.df().DestroySwapchainKHR(device, swapchain, alloc);
|
||||
}
|
||||
}
|
||||
|
|
@ -446,13 +445,13 @@ namespace {
|
|||
/// Vulkan layer entrypoint
|
||||
__attribute__((visibility("default")))
|
||||
VkResult vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface* pVersionStruct) {
|
||||
// ensure loader compatibility
|
||||
// Ensure loader compatibility
|
||||
if (!pVersionStruct
|
||||
|| pVersionStruct->sType != LAYER_NEGOTIATE_INTERFACE_STRUCT
|
||||
|| pVersionStruct->loaderLayerInterfaceVersion < 2)
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
|
||||
// if the layer has already been initialized, skip
|
||||
// If the layer has already been initialized, skip
|
||||
if (layer_info) {
|
||||
pVersionStruct->loaderLayerInterfaceVersion = 2;
|
||||
pVersionStruct->pfnGetPhysicalDeviceProcAddr = nullptr;
|
||||
|
|
@ -461,7 +460,7 @@ VkResult vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface* pVers
|
|||
return VK_SUCCESS;
|
||||
}
|
||||
|
||||
// load the layer configuration
|
||||
// Load the layer configuration
|
||||
try {
|
||||
layer_info = new LayerInfo { // NOLINT (memory management)
|
||||
.map = {
|
||||
|
|
@ -478,20 +477,20 @@ VkResult vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface* pVers
|
|||
.root = Root()
|
||||
};
|
||||
|
||||
if (!layer_info->root.active()) { // skip inactive
|
||||
if (!layer_info->root.active()) { // Skip inactive
|
||||
delete layer_info; // NOLINT (memory management)
|
||||
layer_info = nullptr;
|
||||
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "lsfg-vk: something went wrong during lsfg-vk layer initialization:\n";
|
||||
std::cerr << "lsfg-vk: Something went wrong during lsfg-vk layer initialization:\n";
|
||||
std::cerr << "- " << e.what() << '\n';
|
||||
|
||||
return VK_ERROR_INITIALIZATION_FAILED;
|
||||
}
|
||||
|
||||
// emplace function pointers/version
|
||||
// Emplace function pointers/version
|
||||
pVersionStruct->loaderLayerInterfaceVersion = 2;
|
||||
pVersionStruct->pfnGetPhysicalDeviceProcAddr = nullptr;
|
||||
pVersionStruct->pfnGetDeviceProcAddr = myvkGetDeviceProcAddr;
|
||||
|
|
|
|||
|
|
@ -2,10 +2,10 @@
|
|||
|
||||
#include "instance.hpp"
|
||||
#include "lsfg-vk-common/helpers/paths.hpp"
|
||||
#include "swapchain.hpp"
|
||||
#include "lsfg-vk-common/configuration/detection.hpp"
|
||||
#include "lsfg-vk-common/helpers/errors.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
#include "swapchain.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
|
|
@ -25,7 +25,7 @@ using namespace lsfgvk;
|
|||
using namespace lsfgvk::layer;
|
||||
|
||||
namespace {
|
||||
/// helper function to add required extensions
|
||||
/// Helper function to add required extensions
|
||||
std::vector<const char*> add_extensions(const char* const* existingExtensions, size_t count,
|
||||
const std::vector<const char*>& requiredExtensions) {
|
||||
std::vector<const char*> extensions(count);
|
||||
|
|
@ -45,14 +45,14 @@ namespace {
|
|||
}
|
||||
|
||||
Root::Root() {
|
||||
// find active profile
|
||||
// Find active profile
|
||||
const auto& profile = findProfile(this->config.get(), ls::identify());
|
||||
if (!profile.has_value())
|
||||
return;
|
||||
|
||||
this->active_profile = profile->second;
|
||||
|
||||
std::cerr << "lsfg-vk: using profile with name '" << this->active_profile->name << "' ";
|
||||
std::cerr << "lsfg-vk: Using profile with name '" << this->active_profile->name << "' ";
|
||||
switch (profile->first) {
|
||||
case ls::IdentType::OVERRIDE:
|
||||
std::cerr << "(identified via override)\n";
|
||||
|
|
@ -167,10 +167,10 @@ void Root::modifySwapchainCreateInfo(const vk::Vulkan& vk, VkSwapchainCreateInfo
|
|||
void Root::createSwapchainContext(const vk::Vulkan& vk,
|
||||
VkSwapchainKHR swapchain, const SwapchainInfo& info) {
|
||||
if (!this->active_profile.has_value())
|
||||
throw ls::error("attempted to create swapchain context while layer is inactive");
|
||||
throw ls::error("Attempted to create swapchain context while layer is inactive");
|
||||
const auto& profile = *this->active_profile;
|
||||
|
||||
if (!this->backend.has_value()) { // emplace backend late, due to loader bug
|
||||
if (!this->backend.has_value()) { // Emplace backend late, due to loader bug
|
||||
const auto& global = this->config.get().global();
|
||||
|
||||
setenv("DISABLE_LSFGVK", "1", 1);
|
||||
|
|
@ -183,23 +183,12 @@ void Root::createSwapchainContext(const vk::Vulkan& vk,
|
|||
dll = ls::findShaderDll();
|
||||
|
||||
this->backend.emplace(
|
||||
[gpu = profile.gpu](
|
||||
const std::string& deviceName,
|
||||
std::pair<const std::string&, const std::string&> ids,
|
||||
const std::optional<std::string>& pci
|
||||
) {
|
||||
if (!gpu)
|
||||
return true;
|
||||
|
||||
return (deviceName == *gpu)
|
||||
|| (ids.first + ":" + ids.second == *gpu)
|
||||
|| (pci && *pci == *gpu);
|
||||
},
|
||||
profile.gpu.value_or(""),
|
||||
dll, global.allow_fp16
|
||||
);
|
||||
} catch (const std::exception& e) {
|
||||
unsetenv("DISABLE_LSFGVK");
|
||||
throw ls::error("failed to create backend instance", e);
|
||||
throw ls::error("Failed to create backend instance", e);
|
||||
}
|
||||
|
||||
unsetenv("DISABLE_LSFGVK");
|
||||
|
|
|
|||
|
|
@ -2,13 +2,14 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "lsfg-vk-backend/lsfgvk.hpp"
|
||||
#include "lsfg-vk/lsfgvk.hpp"
|
||||
#include "lsfg-vk-common/configuration/config.hpp"
|
||||
#include "lsfg-vk-common/helpers/errors.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
#include "swapchain.hpp"
|
||||
|
||||
#include <functional>
|
||||
#include <optional>
|
||||
#include <unordered_map>
|
||||
|
||||
|
|
@ -16,49 +17,75 @@
|
|||
|
||||
namespace lsfgvk::layer {
|
||||
|
||||
/// root context of the lsfg-vk layer
|
||||
///
|
||||
/// Root context of the lsfg-vk layer
|
||||
///
|
||||
class Root {
|
||||
public:
|
||||
/// create the lsfg-vk root context
|
||||
///
|
||||
/// Create the lsfg-vk root context
|
||||
///
|
||||
/// @throws ls::error on failure
|
||||
///
|
||||
Root();
|
||||
|
||||
/// check if the layer is active
|
||||
/// @return true if active
|
||||
///
|
||||
/// Check if the layer is active
|
||||
///
|
||||
/// @return true If active
|
||||
///
|
||||
[[nodiscard]] bool active() const { return this->active_profile.has_value(); }
|
||||
|
||||
/// ensure the layer is up-to-date
|
||||
/// @return true if the configuration was updated
|
||||
///
|
||||
/// Ensure the layer is up-to-date
|
||||
///
|
||||
/// @return true If the configuration was updated
|
||||
///
|
||||
bool update();
|
||||
|
||||
/// modify instance create info
|
||||
/// @param createInfo original create info
|
||||
/// @param finish function to call after modification
|
||||
///
|
||||
/// Modify instance create info
|
||||
///
|
||||
/// @param createInfo Original create info
|
||||
/// @param finish Function to call after modification
|
||||
///
|
||||
void modifyInstanceCreateInfo(VkInstanceCreateInfo& createInfo,
|
||||
const std::function<void(void)>& finish) const;
|
||||
/// modify device create info
|
||||
/// @param createInfo original create info
|
||||
/// @param finish function to call after modification
|
||||
///
|
||||
/// Modify device create info
|
||||
///
|
||||
/// @param createInfo Original create info
|
||||
/// @param finish Function to call after modification
|
||||
///
|
||||
void modifyDeviceCreateInfo(VkDeviceCreateInfo& createInfo,
|
||||
const std::function<void(void)>& finish) const;
|
||||
|
||||
/// modify swapchain create info
|
||||
/// @param vk vulkan instance
|
||||
/// @param createInfo original create info
|
||||
/// @param finish function to call after modification
|
||||
///
|
||||
/// Modify swapchain create info
|
||||
///
|
||||
/// @param vk Vulkan instance
|
||||
/// @param createInfo Original create info
|
||||
/// @param finish Function to call after modification
|
||||
///
|
||||
void modifySwapchainCreateInfo(const vk::Vulkan& vk, VkSwapchainCreateInfoKHR& createInfo,
|
||||
const std::function<void(void)>& finish) const;
|
||||
/// create swapchain context
|
||||
/// @param vk vulkan instance
|
||||
/// @param swapchain swapchain handle
|
||||
/// @param info swapchain info
|
||||
|
||||
///
|
||||
/// Create swapchain context
|
||||
///
|
||||
/// @param vk Vulkan instance
|
||||
/// @param swapchain Swapchain handle
|
||||
/// @param info Swapchain info
|
||||
/// @throws ls::error on failure
|
||||
///
|
||||
void createSwapchainContext(const vk::Vulkan& vk, VkSwapchainKHR swapchain,
|
||||
const SwapchainInfo& info);
|
||||
/// get swapchain context
|
||||
/// @param swapchain swapchain handle
|
||||
/// @return swapchain context
|
||||
///
|
||||
/// Get swapchain context
|
||||
///
|
||||
/// @param swapchain Swapchain handle
|
||||
/// @return swapchain Context
|
||||
/// @throws ls::error if not found
|
||||
///
|
||||
[[nodiscard]] Swapchain& getSwapchainContext(VkSwapchainKHR swapchain) {
|
||||
const auto& it = this->swapchains.find(swapchain);
|
||||
if (it == this->swapchains.end())
|
||||
|
|
@ -66,14 +93,17 @@ namespace lsfgvk::layer {
|
|||
|
||||
return it->second;
|
||||
}
|
||||
/// remove swapchain context
|
||||
/// @param swapchain swapchain handle
|
||||
///
|
||||
/// Remove swapchain context
|
||||
///
|
||||
/// @param swapchain Swapchain handle
|
||||
///
|
||||
void removeSwapchainContext(VkSwapchainKHR swapchain);
|
||||
private:
|
||||
ls::WatchedConfig config;
|
||||
std::optional<ls::GameConf> active_profile;
|
||||
|
||||
ls::lazy<backend::Instance> backend;
|
||||
ls::lazy<lsfgvk::Instance> backend;
|
||||
std::unordered_map<VkSwapchainKHR, Swapchain> swapchains;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
/* SPDX-License-Identifier: GPL-3.0-or-later */
|
||||
|
||||
#include "swapchain.hpp"
|
||||
#include "lsfg-vk-backend/lsfgvk.hpp"
|
||||
#include "lsfg-vk/lsfgvk.hpp"
|
||||
#include "lsfg-vk-common/configuration/config.hpp"
|
||||
#include "lsfg-vk-common/helpers/errors.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
|
|
@ -10,11 +10,10 @@
|
|||
#include "lsfg-vk-common/vulkan/semaphore.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <exception>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
|
@ -25,6 +24,7 @@ using namespace lsfgvk;
|
|||
using namespace lsfgvk::layer;
|
||||
|
||||
namespace {
|
||||
/// Barrier helper
|
||||
VkImageMemoryBarrier barrierHelper(VkImage handle,
|
||||
VkAccessFlags srcAccessMask,
|
||||
VkAccessFlags dstAccessMask,
|
||||
|
|
@ -66,66 +66,45 @@ void layer::context_ModifySwapchainCreateInfo(const ls::GameConf& profile, uint3
|
|||
}
|
||||
}
|
||||
|
||||
Swapchain::Swapchain(const vk::Vulkan& vk, backend::Instance& backend,
|
||||
Swapchain::Swapchain(const vk::Vulkan& vk, lsfgvk::Instance& backend,
|
||||
ls::GameConf profile, SwapchainInfo info) :
|
||||
instance(backend),
|
||||
profile(std::move(profile)), info(std::move(info)) {
|
||||
const VkExtent2D extent = this->info.extent;
|
||||
const bool hdr = this->info.format > 57;
|
||||
|
||||
std::vector<int> sourceFds(2);
|
||||
std::vector<int> destinationFds(this->profile.multiplier - 1);
|
||||
|
||||
this->sourceImages.reserve(sourceFds.size());
|
||||
for (int& fd : sourceFds)
|
||||
this->sourceImages.emplace_back(vk,
|
||||
extent, hdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
std::nullopt, &fd);
|
||||
|
||||
this->destinationImages.reserve(destinationFds.size());
|
||||
for (int& fd : destinationFds)
|
||||
this->destinationImages.emplace_back(vk,
|
||||
extent, hdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
std::nullopt, &fd);
|
||||
|
||||
int syncFd{};
|
||||
this->syncSemaphore.emplace(vk, 0, std::nullopt, &syncFd);
|
||||
|
||||
try {
|
||||
this->ctx = ls::owned_ptr<ls::R<backend::Context>>(
|
||||
new ls::R<backend::Context>(backend.openContext(
|
||||
{ sourceFds.at(0), sourceFds.at(1) }, destinationFds, syncFd,
|
||||
extent.width, extent.height,
|
||||
hdr, 1.0F / this->profile.flow_scale, this->profile.performance_mode
|
||||
)),
|
||||
[backend = &backend](ls::R<backend::Context>& ctx) {
|
||||
backend->closeContext(ctx);
|
||||
}
|
||||
this->ctx = std::make_unique<lsfgvk::Context>(
|
||||
backend,
|
||||
extent.width, extent.height,
|
||||
this->profile.flow_scale,
|
||||
this->profile.performance_mode
|
||||
);
|
||||
|
||||
backend::makeLeaking(); // don't worry about it :3
|
||||
this->total = static_cast<uint32_t>(this->profile.multiplier) - 1;
|
||||
} catch (const std::exception& e) {
|
||||
throw ls::error("failed to create swapchain context", e);
|
||||
throw ls::error("Failed to create swapchain context", e);
|
||||
}
|
||||
|
||||
const auto exportedFds = this->ctx->exportFds();
|
||||
this->sourceImage.emplace(vk,
|
||||
extent, VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
exportedFds.sourceFd, std::nullopt, 2);
|
||||
this->destinationImage.emplace(vk,
|
||||
extent, VK_FORMAT_R8G8B8A8_UNORM,
|
||||
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
|
||||
exportedFds.destinationFd);
|
||||
this->syncSemaphore.emplace(vk, 0, exportedFds.syncFd);
|
||||
|
||||
this->renderCommandBuffer.emplace(vk);
|
||||
this->renderFence.emplace(vk);
|
||||
for (size_t i = 0; i < this->destinationImages.size(); i++) {
|
||||
this->finalSemaphore.emplace(vk);
|
||||
for (size_t i = 0; i < this->total; i++) {
|
||||
this->passes.emplace_back(RenderPass {
|
||||
.commandBuffer = vk::CommandBuffer(vk),
|
||||
.acquireSemaphore = vk::Semaphore(vk)
|
||||
.acquireSemaphore = vk::Semaphore(vk),
|
||||
.copySemaphore = vk::Semaphore(vk)
|
||||
});
|
||||
}
|
||||
|
||||
const size_t frames = std::max(this->info.images.size(), this->destinationImages.size() + 2);
|
||||
for (size_t i = 0; i < frames; i++) {
|
||||
this->postCopySemaphores.emplace_back(
|
||||
vk::Semaphore(vk),
|
||||
vk::Semaphore(vk)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
VkResult Swapchain::present(const vk::Vulkan& vk,
|
||||
|
|
@ -133,19 +112,18 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
|
|||
void* next_chain, uint32_t imageIdx,
|
||||
const std::vector<VkSemaphore>& semaphores) {
|
||||
const auto& swapchainImage = this->info.images.at(imageIdx);
|
||||
const auto& sourceImage = this->sourceImages.at(this->fidx % 2);
|
||||
const auto sourceImageIdx{static_cast<uint32_t>(this->iteration) % 2};
|
||||
|
||||
// schedule frame generation
|
||||
// Schedule frame generation
|
||||
try {
|
||||
this->instance.get().scheduleFrames(this->ctx.get());
|
||||
this->ctx->dispatch(this->total);
|
||||
} catch (const std::exception& e) {
|
||||
throw ls::error("failed to schedule frames", e);
|
||||
throw ls::error("Failed to schedule frames", e);
|
||||
}
|
||||
|
||||
// update present mode when not using pacing
|
||||
// Update present mode when not using pacing
|
||||
if (this->profile.pacing == ls::Pacing::None) {
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wunknown-warning-option"
|
||||
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
|
||||
auto* info = reinterpret_cast<VkSwapchainPresentModeInfoEXT*>(next_chain);
|
||||
while (info) {
|
||||
|
|
@ -160,12 +138,12 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
|
|||
#pragma clang diagnostic pop
|
||||
}
|
||||
|
||||
// wait for completion of previous frame
|
||||
if (this->fidx && !this->renderFence->wait(vk, 150ULL * 1000 * 1000))
|
||||
// Wait for completion of previous frame
|
||||
if (this->iteration && !this->renderFence->wait(vk, 150ULL * 1000 * 1000))
|
||||
throw ls::vulkan_error(VK_TIMEOUT, "vkWaitForFences() failed");
|
||||
this->renderFence->reset(vk);
|
||||
|
||||
// copy swapchain image into backend source image
|
||||
// Copy swapchain image into backend source image
|
||||
const auto& cmdbuf = *this->renderCommandBuffer;
|
||||
cmdbuf.begin(vk);
|
||||
|
||||
|
|
@ -177,15 +155,15 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
|
|||
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
|
||||
),
|
||||
barrierHelper(sourceImage.handle(),
|
||||
barrierHelper(this->sourceImage->handle(),
|
||||
VK_ACCESS_NONE,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
|
||||
),
|
||||
},
|
||||
{ swapchainImage, sourceImage.handle() },
|
||||
sourceImage.getExtent(),
|
||||
{ swapchainImage, this->sourceImage->handle() },
|
||||
this->sourceImage->getExtent(),
|
||||
{
|
||||
barrierHelper(swapchainImage,
|
||||
VK_ACCESS_TRANSFER_READ_BIT,
|
||||
|
|
@ -193,39 +171,40 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
|
|||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
|
||||
),
|
||||
}
|
||||
},
|
||||
0, sourceImageIdx
|
||||
);
|
||||
|
||||
cmdbuf.end(vk);
|
||||
|
||||
cmdbuf.submit(vk,
|
||||
semaphores, VK_NULL_HANDLE, 0,
|
||||
{}, this->syncSemaphore->handle(), this->idx++
|
||||
{}, this->syncSemaphore->handle(), this->syncValue
|
||||
);
|
||||
|
||||
for (size_t i = 0; i < this->destinationImages.size(); i++) {
|
||||
auto& pcs = this->postCopySemaphores.at(this->idx % this->postCopySemaphores.size());
|
||||
auto& destinationImage = this->destinationImages.at(i);
|
||||
auto& pass = this->passes.at(i);
|
||||
for (size_t i = 0; i < this->passes.size(); i++) {
|
||||
auto& pass{this->passes.at(i)};
|
||||
const bool last{i == (this->passes.size() - 1)};
|
||||
|
||||
// acquire swapchain image
|
||||
uint32_t aqImageIdx{};
|
||||
// Acquire swapchain image
|
||||
uint32_t swapchainImageIdx{};
|
||||
auto res = vk.df().AcquireNextImageKHR(vk.dev(), swapchain,
|
||||
UINT64_MAX, pass.acquireSemaphore.handle(),
|
||||
VK_NULL_HANDLE,
|
||||
&aqImageIdx
|
||||
&swapchainImageIdx
|
||||
);
|
||||
if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR)
|
||||
throw ls::vulkan_error(res, "vkAcquireNextImageKHR() failed");
|
||||
|
||||
const auto& aquiredSwapchainImage = this->info.images.at(aqImageIdx);
|
||||
const auto& aquiredSwapchainImage = this->info.images.at(swapchainImageIdx);
|
||||
|
||||
// copy backend destination image into swapchain image
|
||||
// Copy backend destination image into swapchain image
|
||||
auto& cmdbuf = pass.commandBuffer;
|
||||
cmdbuf.begin(vk);
|
||||
|
||||
cmdbuf.blitImage(vk,
|
||||
{
|
||||
barrierHelper(destinationImage.handle(),
|
||||
barrierHelper(this->destinationImage->handle(),
|
||||
VK_ACCESS_NONE,
|
||||
VK_ACCESS_TRANSFER_READ_BIT,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
|
||||
|
|
@ -238,8 +217,8 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
|
|||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
|
||||
),
|
||||
},
|
||||
{ destinationImage.handle(), aquiredSwapchainImage },
|
||||
destinationImage.getExtent(),
|
||||
{ this->destinationImage->handle(), aquiredSwapchainImage },
|
||||
this->destinationImage->getExtent(),
|
||||
{
|
||||
barrierHelper(aquiredSwapchainImage,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
|
|
@ -250,48 +229,43 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
|
|||
}
|
||||
);
|
||||
|
||||
std::vector<VkSemaphore> waitSemaphores{ pass.acquireSemaphore.handle() };
|
||||
if (i) { // non-first pass
|
||||
const auto& prevPCS = this->postCopySemaphores.at((this->idx - 1) % this->postCopySemaphores.size());
|
||||
waitSemaphores.push_back(prevPCS.second.handle());
|
||||
}
|
||||
|
||||
const std::vector<VkSemaphore> signalSemaphores{
|
||||
pcs.first.handle(),
|
||||
pcs.second.handle()
|
||||
};
|
||||
|
||||
cmdbuf.end(vk);
|
||||
|
||||
std::vector<VkSemaphore> signalSemaphores{ pass.copySemaphore.handle() };
|
||||
if (last)
|
||||
signalSemaphores.push_back(this->finalSemaphore->handle());
|
||||
|
||||
this->syncValue++;
|
||||
|
||||
cmdbuf.submit(vk,
|
||||
waitSemaphores, this->syncSemaphore->handle(), this->idx,
|
||||
signalSemaphores, VK_NULL_HANDLE, 0,
|
||||
i == this->destinationImages.size() - 1 ? this->renderFence->handle() : VK_NULL_HANDLE
|
||||
{ pass.acquireSemaphore.handle() }, this->syncSemaphore->handle(), this->syncValue,
|
||||
signalSemaphores, last ? nullptr : this->syncSemaphore->handle(), this->syncValue + 1,
|
||||
last ? this->renderFence->handle() : VK_NULL_HANDLE
|
||||
);
|
||||
|
||||
// present swapchain image
|
||||
this->syncValue++;
|
||||
|
||||
// Present swapchain image
|
||||
const VkPresentInfoKHR presentInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
|
||||
.pNext = i ? nullptr : next_chain,
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &pcs.first.handle(),
|
||||
.pWaitSemaphores = &pass.copySemaphore.handle(),
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &swapchain,
|
||||
.pImageIndices = &aqImageIdx,
|
||||
.pImageIndices = &swapchainImageIdx,
|
||||
};
|
||||
res = vk.df().QueuePresentKHR(queue,
|
||||
&presentInfo);
|
||||
if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR)
|
||||
throw ls::vulkan_error(res, "vkQueuePresentKHR() failed");
|
||||
|
||||
this->idx++;
|
||||
}
|
||||
|
||||
// present original swapchain image
|
||||
auto& lastPCS = this->postCopySemaphores.at((this->idx - 1) % this->postCopySemaphores.size());
|
||||
// Present original swapchain image
|
||||
const VkPresentInfoKHR presentInfo{
|
||||
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
|
||||
.waitSemaphoreCount = 1,
|
||||
.pWaitSemaphores = &lastPCS.second.handle(),
|
||||
.pWaitSemaphores = &this->finalSemaphore->handle(),
|
||||
.swapchainCount = 1,
|
||||
.pSwapchains = &swapchain,
|
||||
.pImageIndices = &imageIdx,
|
||||
|
|
@ -300,6 +274,7 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
|
|||
if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR)
|
||||
throw ls::vulkan_error(res, "vkQueuePresentKHR() failed");
|
||||
|
||||
this->fidx++;
|
||||
this->iteration++;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "lsfg-vk-backend/lsfgvk.hpp"
|
||||
#include "lsfg-vk/lsfgvk.hpp"
|
||||
#include "lsfg-vk-common/configuration/config.hpp"
|
||||
#include "lsfg-vk-common/helpers/pointers.hpp"
|
||||
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
|
||||
|
|
@ -12,15 +12,18 @@
|
|||
#include "lsfg-vk-common/vulkan/timeline_semaphore.hpp"
|
||||
#include "lsfg-vk-common/vulkan/vulkan.hpp"
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include <vulkan/vulkan_core.h>
|
||||
|
||||
namespace lsfgvk::layer {
|
||||
|
||||
/// swapchain info struct
|
||||
///
|
||||
/// Swapchain info struct
|
||||
///
|
||||
struct SwapchainInfo {
|
||||
std::vector<VkImage> images;
|
||||
VkFormat format;
|
||||
|
|
@ -29,53 +32,67 @@ namespace lsfgvk::layer {
|
|||
VkPresentModeKHR presentMode;
|
||||
};
|
||||
|
||||
/// modify the swapchain create info based on the profile pre-swapchain creation
|
||||
/// @param profile active game profile
|
||||
/// @param maxImages maximum number of images supported by the surface
|
||||
/// @param createInfo swapchain create info to modify
|
||||
///
|
||||
/// Modify the swapchain create info based on the profile pre-swapchain creation
|
||||
///
|
||||
/// @param profile Active game profile
|
||||
/// @param maxImages Maximum number of images supported by the surface
|
||||
/// @param createInfo Swapchain create info to modify
|
||||
///
|
||||
void context_ModifySwapchainCreateInfo(const ls::GameConf& profile, uint32_t maxImages,
|
||||
VkSwapchainCreateInfoKHR& createInfo);
|
||||
|
||||
/// swapchain context for a layer instance
|
||||
///
|
||||
/// Swapchain context for a layer instance
|
||||
///
|
||||
class Swapchain {
|
||||
public:
|
||||
/// create a new swapchain context
|
||||
/// @param vk vulkan instance
|
||||
///
|
||||
/// Create a new swapchain context
|
||||
///
|
||||
/// @param vk Vulkan instance
|
||||
/// @param backend lsfg-vk backend instance
|
||||
/// @param profile active game profile
|
||||
/// @param info swapchain info
|
||||
Swapchain(const vk::Vulkan& vk, backend::Instance& backend,
|
||||
/// @param profile Active game profile
|
||||
/// @param info Swapchain info
|
||||
///
|
||||
Swapchain(const vk::Vulkan& vk, lsfgvk::Instance& backend,
|
||||
ls::GameConf profile, SwapchainInfo info);
|
||||
|
||||
/// present a frame
|
||||
/// @param vk vulkan instance
|
||||
/// @param queue presentation queue
|
||||
/// @param next_chain next chain pointer for the present info (WARN: shared!)
|
||||
/// @param imageIdx swapchain image index to present to
|
||||
/// @param semaphores semaphores to wait on before presenting
|
||||
/// @throws ls::vulkan_error on vulkan errors
|
||||
///
|
||||
/// Present a frame
|
||||
///
|
||||
/// @param vk Vulkan instance
|
||||
/// @param queue Presentation queue
|
||||
/// @param next_chain next chain pointer for the present info (WARNING: shared!)
|
||||
/// @param imageIdx Swapchain image index to present to
|
||||
/// @param semaphores Semaphores to wait on before presenting
|
||||
/// @throws ls::vulkan_error on vulkan error
|
||||
///
|
||||
VkResult present(const vk::Vulkan& vk,
|
||||
VkQueue queue, VkSwapchainKHR swapchain,
|
||||
void* next_chain, uint32_t imageIdx,
|
||||
const std::vector<VkSemaphore>& semaphores);
|
||||
private:
|
||||
std::vector<vk::Image> sourceImages;
|
||||
std::vector<vk::Image> destinationImages;
|
||||
ls::lazy<vk::Image> sourceImage;
|
||||
ls::lazy<vk::Image> destinationImage;
|
||||
ls::lazy<vk::TimelineSemaphore> syncSemaphore;
|
||||
|
||||
ls::lazy<vk::CommandBuffer> renderCommandBuffer;
|
||||
ls::lazy<vk::Fence> renderFence;
|
||||
ls::lazy<vk::Semaphore> finalSemaphore;
|
||||
struct RenderPass {
|
||||
vk::CommandBuffer commandBuffer;
|
||||
vk::Semaphore acquireSemaphore;
|
||||
vk::Semaphore copySemaphore;
|
||||
};
|
||||
std::vector<RenderPass> passes;
|
||||
std::vector<std::pair<vk::Semaphore, vk::Semaphore>> postCopySemaphores;
|
||||
|
||||
ls::R<backend::Instance> instance;
|
||||
ls::owned_ptr<ls::R<backend::Context>> ctx;
|
||||
size_t idx{1};
|
||||
size_t fidx{0}; // real frame index
|
||||
ls::R<lsfgvk::Instance> instance;
|
||||
std::unique_ptr<lsfgvk::Context> ctx;
|
||||
uint32_t total{};
|
||||
|
||||
size_t iteration{0};
|
||||
size_t syncValue{1};
|
||||
|
||||
ls::GameConf profile;
|
||||
SwapchainInfo info;
|
||||
|
|
|
|||
|
|
@ -25,3 +25,5 @@ Checks:
|
|||
- -portability-avoid-pragma-once
|
||||
# Qt requires use of raw pointers in many places
|
||||
- -cppcoreguidelines-owning-memory
|
||||
# Qt seems to break some ranges algorithms in GCC
|
||||
- -modernize-use-ranges
|
||||
|
|
|
|||
|
|
@ -28,15 +28,13 @@ set_target_properties(lsfg-vk-ui PROPERTIES
|
|||
AUTOUIC ON)
|
||||
|
||||
target_compile_options(lsfg-vk-ui PRIVATE # QT-codegen warnings
|
||||
-Wno-unknown-warning-option
|
||||
-Wno-ctad-maybe-unsupported
|
||||
-Wno-unsafe-buffer-usage-in-libc-call
|
||||
-Wno-global-constructors
|
||||
-Wno-unsafe-buffer-usage)
|
||||
-Wno-unsafe-buffer-usage
|
||||
-Wno-global-constructors)
|
||||
|
||||
target_link_libraries(lsfg-vk-ui
|
||||
PRIVATE lsfg-vk-common
|
||||
PRIVATE lsfg-vk-backend
|
||||
PRIVATE Qt6::Quick)
|
||||
|
||||
install(TARGETS lsfg-vk-ui
|
||||
|
|
|
|||
|
|
@ -145,12 +145,12 @@ ApplicationWindow {
|
|||
|
||||
GroupEntry {
|
||||
title: "Path to Lossless Scaling"
|
||||
description: "Change the location of Lossless.dll"
|
||||
description: "Change the location of lsfg-vk.dll"
|
||||
|
||||
FileEdit {
|
||||
Layout.fillWidth: true
|
||||
|
||||
title: "Select Lossless.dll"
|
||||
title: "Select lsfg-vk.dll"
|
||||
filter: "Dynamic Link Library Files (*.dll)"
|
||||
|
||||
text: backend.dll
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ using namespace lsfgvk;
|
|||
using namespace lsfgvk::ui;
|
||||
|
||||
Backend::Backend() {
|
||||
// load configuration
|
||||
// Load existing configuration
|
||||
ls::ConfigFile config{};
|
||||
|
||||
auto path = ls::findConfigurationFile();
|
||||
|
|
@ -27,7 +27,8 @@ Backend::Backend() {
|
|||
try {
|
||||
config = ls::ConfigFile(path);
|
||||
} catch (const std::exception&) {
|
||||
std::cerr << "the configuration file is invalid, it has been backed up to '.old'\n";
|
||||
std::cerr << "The existing configuration file is invalid, "
|
||||
<< "it has been backed up to '.old'\n";
|
||||
std::filesystem::rename(path, path.string() + ".old");
|
||||
}
|
||||
}
|
||||
|
|
@ -35,17 +36,17 @@ Backend::Backend() {
|
|||
this->m_global = config.global();
|
||||
this->m_profiles = config.profiles();
|
||||
|
||||
// create gpu list
|
||||
this->m_gpu_list = ui::getAvailableGPUs();
|
||||
// Create gpu list
|
||||
this->m_gpu_list = ui::queryGPUs();
|
||||
|
||||
// create profile list model
|
||||
// Create profile list model
|
||||
QStringList profiles;
|
||||
for (const auto& profile : this->m_profiles)
|
||||
profiles.append(QString::fromStdString(profile.name));
|
||||
|
||||
this->m_profile_list_model = new QStringListModel(profiles, this);
|
||||
|
||||
// create active_in list models
|
||||
// Create active_in list models
|
||||
this->m_active_in_list_models.reserve(this->m_profiles.size());
|
||||
for (const auto& profile : this->m_profiles) {
|
||||
QStringList active_in;
|
||||
|
|
@ -55,11 +56,11 @@ Backend::Backend() {
|
|||
this->m_active_in_list_models.push_back(new QStringListModel(active_in, this));
|
||||
}
|
||||
|
||||
// try to select first profile
|
||||
// Try to select first profile
|
||||
if (!this->m_profiles.empty())
|
||||
this->m_profile_index = 0;
|
||||
|
||||
// spawn saving thread
|
||||
// Spawn saving thread
|
||||
std::thread([this, path]() {
|
||||
while (true) {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(500));
|
||||
|
|
@ -74,10 +75,10 @@ Backend::Backend() {
|
|||
try {
|
||||
std::filesystem::create_directories(path.parent_path());
|
||||
if (!std::filesystem::exists(path.parent_path()))
|
||||
throw ls::error("unable to create configuration directory");
|
||||
throw ls::error("Unable to create configuration directory");
|
||||
config.write(path);
|
||||
} catch (const std::exception& e) {
|
||||
std::cerr << "unable to write configuration:\n- " << e.what() << "\n";
|
||||
std::cerr << "Unable to write configuration:\n- " << e.what() << "\n";
|
||||
}
|
||||
}
|
||||
}).detach();
|
||||
|
|
|
|||
|
|
@ -9,14 +9,18 @@
|
|||
#include "lsfg-vk-common/configuration/config.hpp"
|
||||
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <optional>
|
||||
#include <stdexcept>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#define getters public
|
||||
#define setters public
|
||||
|
||||
namespace lsfgvk::ui {
|
||||
|
||||
/// Class tying ui and configuration together
|
||||
/// Class tying UI and Configuration together
|
||||
class Backend : public QObject {
|
||||
Q_OBJECT
|
||||
|
||||
|
|
|
|||
|
|
@ -5,63 +5,100 @@
|
|||
#include <QString>
|
||||
|
||||
#include "utils.hpp"
|
||||
#include "lsfg-vk-backend/lsfgvk.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <optional>
|
||||
#include <stdexcept>
|
||||
#include <cstddef>
|
||||
#include <iomanip>
|
||||
#include <ios>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
|
||||
#define VULKAN_HPP_NO_DEFAULT_DISPATCHER 1
|
||||
#define VULKAN_HPP_NO_CONSTRUCTORS 1
|
||||
#include <vulkan/vulkan.hpp>
|
||||
|
||||
using namespace lsfgvk;
|
||||
using namespace lsfgvk::ui;
|
||||
|
||||
QStringList ui::getAvailableGPUs() {
|
||||
// list of found GPUs and their optional PCI IDs
|
||||
std::vector<std::pair<std::string, std::optional<std::string>>> gpus{};
|
||||
QStringList ui::queryGPUs() {
|
||||
// Create a Vulkan instance
|
||||
vk::detail::DispatchLoaderDynamic dld;
|
||||
dld.init();
|
||||
|
||||
// create a backend to query all GPUs
|
||||
try {
|
||||
const backend::DevicePicker picker{[&gpus](
|
||||
const std::string& deviceName,
|
||||
std::pair<const std::string&, const std::string&>,
|
||||
const std::optional<std::string>& pci
|
||||
) {
|
||||
gpus.emplace_back(deviceName, pci);
|
||||
return false; // always fail
|
||||
}};
|
||||
const vk::ApplicationInfo appInfo{
|
||||
.pApplicationName = "lsfg-vk-ui",
|
||||
.applicationVersion = vk::makeVersion(2, 0, 0),
|
||||
.pEngineName = "lsfg-vk-ui",
|
||||
.engineVersion = vk::makeVersion(2, 0, 0),
|
||||
.apiVersion = vk::ApiVersion12 // Required by lsfg-vk anyways
|
||||
};
|
||||
const vk::InstanceCreateInfo instanceInfo{
|
||||
.pApplicationInfo = &appInfo
|
||||
};
|
||||
const vk::UniqueInstance instance{vk::createInstanceUnique(instanceInfo, nullptr, dld)};
|
||||
dld.init(*instance);
|
||||
|
||||
const backend::Instance instance{picker, "/non/existent/path", false};
|
||||
throw std::runtime_error("???");
|
||||
} catch (const backend::error&) { // NOLINT (empty catch)
|
||||
// expected
|
||||
// Query physical devices
|
||||
std::vector<std::string> devicesByName{};
|
||||
std::vector<std::string> devicesByBusId{};
|
||||
|
||||
for (const auto& physdev : instance->enumeratePhysicalDevices(dld)) {
|
||||
// Check for VK_EXT_pci_bus_info
|
||||
bool supportsPCIEXT{false};
|
||||
for (const auto& ext : physdev.enumerateDeviceExtensionProperties(nullptr, dld)) {
|
||||
if (std::string(ext.extensionName) != vk::EXTPciBusInfoExtensionName)
|
||||
continue;
|
||||
|
||||
supportsPCIEXT = true;
|
||||
break;
|
||||
}
|
||||
|
||||
// Fetch properties
|
||||
vk::PhysicalDevicePCIBusInfoPropertiesEXT busInfo{};
|
||||
vk::PhysicalDeviceProperties2 info{
|
||||
.pNext = supportsPCIEXT ? &busInfo : nullptr
|
||||
};
|
||||
physdev.getProperties2(&info, dld);
|
||||
|
||||
auto& props{info.properties};
|
||||
|
||||
// Append device name
|
||||
props.deviceName.back() = '\0'; // Ensure null-termination
|
||||
devicesByName.emplace_back(props.deviceName);
|
||||
|
||||
// Append PCI bus ID
|
||||
if (!supportsPCIEXT)
|
||||
continue;
|
||||
|
||||
std::ostringstream pciss;
|
||||
pciss << std::hex << std::setfill('0')
|
||||
<< std::setw(4) << busInfo.pciDomain << ":"
|
||||
<< std::setw(2) << busInfo.pciBus << ":"
|
||||
<< std::setw(2) << busInfo.pciDevice << "."
|
||||
<< std::setw(1) << busInfo.pciFunction;
|
||||
devicesByBusId.emplace_back(pciss.str());
|
||||
}
|
||||
|
||||
// NOLINTBEGIN (ranges) [GCC has some issues with ranges]
|
||||
// first remove 1:1 duplicates
|
||||
std::sort(gpus.begin(), gpus.end());
|
||||
gpus.erase(std::unique(gpus.begin(), gpus.end()), gpus.end());
|
||||
// NOLINTEND
|
||||
// Count duplicate names
|
||||
std::unordered_map<std::string, size_t> repeats{};
|
||||
for (const auto& name : devicesByName)
|
||||
repeats[name]++;
|
||||
|
||||
// build the frontend list
|
||||
// Build the frontend list
|
||||
QStringList list{"Default"};
|
||||
for (const auto& gpu : gpus) {
|
||||
// check if GPU is in list more than once
|
||||
auto count = std::count_if(gpus.begin(), gpus.end(),
|
||||
[&gpu](const auto& other) {
|
||||
return other.first == gpu.first;
|
||||
}
|
||||
);
|
||||
for (size_t i = 0; i < devicesByName.size(); i++) {
|
||||
const auto& name{devicesByName.at(i)};
|
||||
|
||||
// add pci id to distinguish, otherwise add just the name
|
||||
// Decide whether to show PCI bus ID or device name
|
||||
QString entry;
|
||||
if (count > 1 && gpu.second.has_value())
|
||||
entry = QString::fromStdString(*gpu.second);
|
||||
if (repeats[name] > 1)
|
||||
entry = QString::fromStdString(devicesByBusId.at(i));
|
||||
else
|
||||
entry = QString::fromStdString(gpu.first);
|
||||
entry = QString::fromStdString(name);
|
||||
|
||||
// ensure no duplicates (flatpak does funny things)
|
||||
// Append to list if not already present (flatpak does funny things)
|
||||
if (list.contains(entry))
|
||||
continue;
|
||||
list.append(entry);
|
||||
|
|
|
|||
|
|
@ -6,9 +6,11 @@
|
|||
|
||||
namespace lsfgvk::ui {
|
||||
|
||||
/// get the list of available GPUs, automatically
|
||||
/// switching to PCI IDs if there are duplicates
|
||||
/// @return list of available GPUs
|
||||
QStringList getAvailableGPUs();
|
||||
///
|
||||
/// Query all GPUs available on the system.
|
||||
///
|
||||
/// @return List of available GPUs
|
||||
///
|
||||
QStringList queryGPUs();
|
||||
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue