This commit is contained in:
PancakeTAS 2026-05-09 18:16:46 +00:00 committed by GitHub
commit d4b41c9008
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
84 changed files with 5585 additions and 3221 deletions

View file

@ -6,14 +6,14 @@ include(GNUInstallDirs)
option(LSFGVK_BUILD_VK_LAYER "Build the Vulkan layer" ON)
option(LSFGVK_BUILD_UI "Build the user interface" OFF)
option(LSFGVK_BUILD_CLI "Build the command line interface" ON)
option(LSFGVK_INSTALL_DEVELOP "Install development libraries and headers" OFF)
option(LSFGVK_INSTALL_LIBRARIES "Install development libraries and headers" OFF)
option(LSFGVK_INSTALL_XDG_FILES "Install the application icon and desktop files" OFF)
set(LSFGVK_LAYER_LIBRARY_PATH liblsfg-vk-layer.so CACHE STRING "Change where Vulkan searches for the layer library")
option(LSFGVK_TESTING_RENDERDOC "Enable RenderDoc integration for testing purposes" OFF)
# === READ HERE FOR BUILD OPTIONS ===
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_VISIBILITY_PRESET hidden)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
set(CMAKE_SKIP_RPATH ON)
@ -29,24 +29,20 @@ if(CMAKE_BUILD_TYPE STREQUAL "Debug")
add_compile_options(
# By default, enable all warnings
-Weverything
-Wno-unknown-warning-option
# Some warnings are incompatible with each other
-Wno-pre-c++20-compat-pedantic
-Wno-c++98-compat-pedantic
-Wno-switch-default
# Then there's code-style things I don't care about
-Wno-missing-designated-field-initializers
-Wno-shadow
-Wno-unused-macros
# And functional warning I don't care about either
-Wno-cast-function-type-strict
-Wno-padded
)
endif()
endif()
if(LSFGVK_TESTING_RENDERDOC)
add_compile_definitions(LSFGVK_TESTING_RENDERDOC)
endif()
add_subdirectory(lsfg-vk-common)
add_subdirectory(lsfg-vk-backend)
if(LSFGVK_BUILD_VK_LAYER)

View file

@ -76,7 +76,7 @@ However, lsfg-vk provides several CMake options to customize the build process:
- `LSFGVK_BUILD_VK_LAYER`: Set to `On` to build the Vulkan layer (default is `On`).
- `LSFGVK_BUILD_UI`: Set to `On` to build the user interface (default is `Off`).
- `LSFGVK_BUILD_CLI`: Set to `On` to build the command-line interface (default is `On`).
- `LSFGVK_INSTALL_DEVELOP`: Set to `On` to install development files like headers and libraries (default is `Off`).
- `LSFGVK_INSTALL_LIBRARIES`: Set to `On` to install development files like headers and libraries (default is `Off`).
- `LSFGVK_INSTALL_XDG_FILES`: Set to `On` to install XDG desktop files and icons (default is `Off`).
- `LSFGVK_LAYER_LIBRARY_PATH`: Override the path to the Vulkan layer library (by default, Vulkan will search the systems library path).

View file

@ -10,7 +10,7 @@ Regardless of the method you choose, the concept of profiles remains the same.
### All Configuration Options
Below is a list of all available **global** configuration options:
- **Path to Lossless Scaling / `dll`**: By default, lsfg-vk will search certain directories for Lossless Scaling. If you have Lossless Scaling installed in a custom location, you can specify the full path to the "Lossless.dll" file inside of Lossless Scaling here.
- **Path to Lossless Scaling / `dll`**: By default, lsfg-vk will search certain directories for Lossless Scaling. If you have Lossless Scaling installed in a custom location, you can specify the full path to the "lsfg-vk.dll" file inside of Lossless Scaling here.
- **Allow half-precision / `allow_fp16`**: If enabled, this will allow lsfg-vk to take advantage of half-precision shader operations if supported by the GPU. This has a giant performance uplift on AMD GPUs, but does not affect NVIDIA GPUs (GTX 1000-series or older cards will actually see a big performance **decrease**). This option **does not** influence quality. (Default: `true`)
Next is a list of all available **profile** configuration options:
@ -18,12 +18,12 @@ Next is a list of all available **profile** configuration options:
- **Active In / `active_in`**: A list of 1) linux binary names, such as `mpv`, 2) windows executables, such as `GenshinImpact.exe` and 3) process names, such as `GameThread`. It is also possible to specify the last part of a path (e.g. `Ghostrunner2/Binaries/Win64/Ghostrunner2-Win64-Shipping.exe`). When a process matching one of these rules is detected, this profile will be activated.
- **Multiplier / `multiplier`**: The frame generation multiplier. A value of 3 means that for every frame rendered by the application, lsfg-vk will generate 2 additional frames. (Default: `2`)
- **Flow Scale / `flow_scale`**: The resolution scale at which the motion vectors are calculated. A lower value means better performance, but worse quality. (Default: `1.0`)
- **Performance Mode / `performance_mode`**: When enabled, a significantly lighter frame generation model is used. This has a minor quality impact, but greatly improves performance.
- **Performance Mode / `performance_mode`**: When enabled, a significantly lighter frame generation model is used. This has a minor quality impact, but greatly improves performance.
(Default: `false`)
- **Pacing Mode / `pacing`**: This option is explained in greater detail below. Supported values are **None / `none`**.
- **GPU / `gpu`**: The GPU to use for frame generation. This MUST be the **same GPU** as the one being used by the application. **Dual GPU is NOT supported**. You can identify a GPU through its name (e.g. `NVIDIA GeForce RTX 3080`), uppercase-only ID (e.g. `0x10DE:0x2C02`) or PCI bus ID (e.g. `3:0.0`). If not specified, the primary GPU will be used, which may lead to issues.
The "Multiplier", "Flow Scale" and "Performance Mode" options can be **hot-reloaded**, meaning that changes to these options will take effect immediately without needing to restart the application. Options such as "Pacing Mode" or removal of the profile require a swapchain recreation, which usually means resizing or restarting the application. Any other change requires an application restart.
The "Multiplier", "Flow Scale" and "Performance Mode" options can be **hot-reloaded**, meaning that changes to these options will take effect immediately without needing to restart the application. Options such as "Pacing Mode" or removal of the profile require a swapchain recreation, which usually means resizing or restarting the application. Any other change requires an application restart.
### Pacing Modes
@ -45,7 +45,7 @@ The following environment variables affect lsfg-vk:
- `LSFGVK_PROFILE`: Name of the profile to use. If set, this will override automatic profile detection.
If you do not wish to use a configuration file, you can also set configuration options through environment variables. To do this, set `LSFGVK_ENV=1` and then any of the following variables:
- `LSFGVK_DLL_PATH`: Path to Lossless Scaling DLL.
- `LSFGVK_DLL_PATH`: Path to lsfg-vk DLL.
- `LSFGVK_NO_FP16`: If set to `1`, half-precision will be disabled.
- `LSFGVK_MULTIPLIER`: Frame generation multiplier.
- `LSFGVK_FLOW_SCALE`: Flow scale value.

View file

@ -23,8 +23,3 @@ Checks:
- -cppcoreguidelines-macro-usage
- -bugprone-easily-swappable-parameters
- -portability-avoid-pragma-once
# Vulkan requires the use of reinterpret/const casts in many places
- -cppcoreguidelines-pro-type-reinterpret-cast
- -cppcoreguidelines-pro-type-const-cast
# We use namespace forward declarations
- -bugprone-forward-declaration-namespace

View file

@ -1,33 +1,23 @@
set(BACKEND_SOURCES
"src/extraction/dll_reader.cpp"
"src/extraction/shader_registry.cpp"
"src/helpers/limits.cpp"
"src/helpers/managed_shader.cpp"
"src/helpers/utils.cpp"
"src/shaderchains/alpha0.cpp"
"src/shaderchains/alpha1.cpp"
"src/shaderchains/beta0.cpp"
"src/shaderchains/beta1.cpp"
"src/shaderchains/delta0.cpp"
"src/shaderchains/delta1.cpp"
"src/shaderchains/gamma0.cpp"
"src/shaderchains/gamma1.cpp"
"src/shaderchains/generate.cpp"
"src/shaderchains/mipmaps.cpp"
"src/modules/library/dll.cpp"
"src/modules/library.cpp"
"src/modules/pipeline.cpp"
"src/utility/pipelines.cpp"
"src/utility/vkhelper.cpp"
"src/lsfgvk.cpp")
add_library(lsfg-vk-backend STATIC ${BACKEND_SOURCES})
target_include_directories(lsfg-vk-backend
PUBLIC include)
PUBLIC include
PRIVATE src)
target_link_libraries(lsfg-vk-backend
PUBLIC lsfg-vk-common)
target_compile_options(lsfg-vk-backend PUBLIC
$<$<CXX_COMPILER_ID:Clang>:-fconstexpr-steps=4290000000>
$<$<CXX_COMPILER_ID:GNU>:-fconstexpr-ops-limit=4290000000>
)
set_target_properties(lsfg-vk-backend PROPERTIES
CXX_VISIBILITY_PRESET hidden)
if(LSFGVK_INSTALL_DEVELOP)
if(LSFGVK_INSTALL_LIBRARIES)
install(TARGETS lsfg-vk-backend
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
install(DIRECTORY "include/lsfg-vk-backend/"

View file

@ -1,143 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include <exception>
#include <filesystem>
#include <functional>
#include <memory>
#include <optional>
#include <string>
#include <utility>
#include <vector>
namespace lsfgvk::backend {
class [[gnu::visibility("default")]] ContextImpl;
class [[gnu::visibility("default")]] InstanceImpl;
using Context = ContextImpl;
///
/// Primitive exception class that deliveres a detailed error message
///
class [[gnu::visibility("default")]] error : public std::runtime_error {
public:
///
/// Construct an error
///
/// @param msg Error message.
/// @param inner Inner exception.
///
explicit error(const std::string &msg, const std::exception &inner);
///
/// Construct an error
///
/// @param msg Error message.
///
explicit error(const std::string &msg);
error(const error &) = default;
error &operator=(const error &) = default;
error(error &&) = default;
error &operator=(error &&) = default;
~error() override;
};
/// Function type for picking a device based on its name and IDs
using DevicePicker = std::function<bool(
const std::string& deviceName,
std::pair<const std::string&, const std::string&> ids, // (vendor ID, device ID) 0xXXXX format
const std::optional<std::string>& pci // (bus:slot.func) if available, no padded zeros
)>;
///
/// Main entry point of the library
///
class [[gnu::visibility("default")]] Instance {
public:
///
/// Create a lsfg-vk instance
///
/// @param devicePicker Function that picks a physical device based on some identifiers.
/// @param shaderDllPath Path to the Lossless.dll file to load shaders from.
/// @param allowLowPrecision Whether to load low-precision (FP16) shaders if supported.
///
/// @throws backend::error on failure
///
Instance(
const DevicePicker& devicePicker,
const std::filesystem::path& shaderDllPath,
bool allowLowPrecision
);
///
/// Open a frame generation context.
///
/// The VkFormat of the exchanged images is inferred from whether hdr is true or false:
/// - false: VK_FORMAT_R8G8B8A8_UNORM
/// - true: VK_FORMAT_R16G16B16A16_SFLOAT
///
/// The application and library must keep track of the frame index. When the next frame
/// is ready, signal the syncFd with one increment (with the first trigger being 1).
/// Each generated frame will increment the semaphore by one:
/// - Application signals 1 -> Start generating with (curr, next) source images
/// - Library signals 1 -> First frame between (curr, next) is ready
/// - Library signals N -> N-th frame between (curr, next) is ready
/// - Application signals N+1 -> Start generating with (next, curr) source images
///
/// @param sourceFds Pair of file descriptors for the source images alternated between.
/// @param destFds Vector with file descriptors to import output images from.
/// @param syncFd File descriptor for the timeline semaphore used for synchronization.
/// @param width Width of the images.
/// @param height Height of the images.
/// @param hdr Whether the images are HDR.
/// @param flow Motion flow factor.
/// @param perf Whether to enable performance mode.
///
/// @throws backend::error on failure
///
Context& openContext(
std::pair<int, int> sourceFds,
const std::vector<int>& destFds,
int syncFd,
uint32_t width, uint32_t height,
bool hdr, float flow, bool perf
);
///
/// Schedule a new set of generated frames.
///
/// @param context Context to use.
/// @throws backend::error on failure
///
void scheduleFrames(Context& context);
///
/// Close a frame generation context
///
/// @param context Context to close.
///
void closeContext(const Context& context);
// Non-copyable and non-movable
Instance(const Instance&) = delete;
Instance& operator=(const Instance&) = delete;
Instance(Instance&&) = delete;
Instance& operator=(Instance&&) = delete;
virtual ~Instance();
private:
std::unique_ptr<InstanceImpl> m_impl;
std::vector<std::unique_ptr<Context>> m_contexts;
};
///
/// Make all lsfg-vk instances leaking.
/// This is to workaround a bug in the Vulkan loader, which
/// makes it impossible to destroy Vulkan instances and devices.
///
void makeLeaking();
}

View file

@ -0,0 +1,152 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include <cstdint>
#include <filesystem>
#include <memory>
#include <string>
#ifdef LSFGVK_PRIV
#include <vulkan/vulkan_core.h>
#endif // LSFGVK_PRIV
namespace lsfgvk {
/// Forward declaration of implementation classes
namespace priv {
struct [[gnu::visibility("default")]] Instance;
struct [[gnu::visibility("default")]] Context;
}
///
/// Main entrypoint of the library
///
class [[gnu::visibility("default")]] Instance {
friend class Context;
public:
///
/// Create a lsfg-vk instance
///
/// The device identifier may be one of:
/// - Device name (e.g. "NVIDIA GeForce RTX 5080")
/// - Vendor ID + Device ID in lowercase hexadecimal (e.g. "10de:2c02")
/// - PCI bus ID with padded zeroes (e.g. "0000:01:00.0")
///
/// @param deviceId Device identifier (see above)
/// @param lsfgvkDllPath Path to the lsfg-vk DLL file
/// @param allowFP16 Whether to allow usage of fp16 shader variants
/// @throws std::runtime_error on failure
///
Instance(
const std::string& deviceId,
const std::filesystem::path& lsfgvkDllPath,
bool allowFP16
);
#ifdef LSFGVK_PRIV
/// Get the underlying Vulkan instance handle
/// @return Vulkan instance
[[nodiscard]] VkInstance _instance() const;
/// Get the underlying Vulkan device handle
/// @return Vulkan device
[[nodiscard]] VkDevice _device() const;
#endif // LSFGVK_PRIV
// Non-copyable, non-movable
Instance(const Instance&) = delete;
Instance& operator=(const Instance&) = delete;
Instance(Instance&&) = delete;
Instance& operator=(Instance&&) = delete;
~Instance();
private:
std::unique_ptr<priv::Instance> m_priv;
};
///
/// File descriptors exported from a context, the user must close them after use.
///
struct FileDescriptors {
///
/// File descriptor for a Vulkan memory allocation containing
/// a 2D array of RGBA8 pixels with length 2 and optimal allocation.
///
/// Starting at iteration 0, the next frame for which frames should be interpolated
/// inbetween should be placed in image `iteration % 2`.
///
int sourceFd;
///
/// File descriptor for a Vulkan memory allocation containing a single RGBA8
/// image into which each generated frame will be written to.
///
int destinationFd;
///
/// File descriptor for a timeline semaphore. When scheduling frames for generation,
/// a specific value is waited for and signaled on return. It is up to the user to ensure
/// the destination image is not overwritten before it is read.
///
int syncFd;
};
/// A context for generating frames
///
class [[gnu::visibility("default")]] Context {
public:
///
/// Create a frame generation context
///
/// @param instance Parent instance
/// @param width Image width
/// @param height Image height
/// @param flowScale Flow estimation scale factor
/// @param performanceMode Whether to enable performance mode
/// @throws std::runtime_error on failure
///
Context(
const Instance& instance,
uint32_t width,
uint32_t height,
float flowScale,
bool performanceMode
);
///
/// Export the internal resources
///
/// @return File descriptors for internal resources
/// @throws std::runtime_error on failure
///
[[nodiscard]] FileDescriptors exportFds() const;
///
/// Dispatch frame generation
///
/// Let `so - 1` be the current value of the timeline semaphore, starting at 0.
/// The user must signal `so` to start the generation of the next frame, after
/// which lsfg-vk will signal `so + 1`. The user must ensure the previously
/// generated frame is read before signaling the next one (at `so + 2` and so on).
///
/// @param total Total number of frames to generate
/// @throws std::runtime_error on failure
///
void dispatch(uint32_t total);
///
/// Wait for the device to be idle
///
void idle() const;
// Non-copyable, non-movable
Context(const Context&) = delete;
Context& operator=(const Context&) = delete;
Context(Context&&) = delete;
Context& operator=(Context&&) = delete;
~Context();
private:
std::unique_ptr<priv::Context> m_priv;
};
}

View file

@ -1,19 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include <cstdint>
#include <filesystem>
#include <unordered_map>
#include <vector>
namespace lsfgvk::backend {
/// extract all resources from a DLL file
/// @param dll path to the DLL file
/// @return map of resource IDs to their binary data
/// @throws ls::error on various failure points
std::unordered_map<uint32_t, std::vector<uint8_t>> extractResourcesFromDLL(
const std::filesystem::path& dll);
}

View file

@ -1,171 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "shader_registry.hpp"
#include "lsfg-vk-common/helpers/errors.hpp"
#include "lsfg-vk-common/vulkan/shader.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <cstdint>
#include <span>
#include <string>
#include <unordered_map>
#include <vector>
using namespace lsfgvk;
using namespace lsfgvk::backend;
namespace {
/// get the source code for a shader
const std::vector<uint8_t>& getShaderSource(uint32_t id, bool fp16, bool perf,
const std::unordered_map<uint32_t, std::vector<uint8_t>>& resources) {
const size_t BASE_OFFSET = 49;
const size_t OFFSET_PERF = 23;
const size_t OFFSET_FP32 = 49;
auto it = resources.find(BASE_OFFSET + id +
(perf ? OFFSET_PERF : 0) +
(fp16 ? 0 : OFFSET_FP32));
if (it == resources.end())
throw ls::error("unable to find shader with id: " + std::to_string(id));
return it->second;
}
/// patch the generate shader
void patchGenerateShader(std::vector<uint8_t>& data, bool hdr) {
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunknown-warning-option"
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage-in-container"
auto* _ptr = data.data();
const std::span<uint32_t> words(
reinterpret_cast<uint32_t*>(_ptr),
data.size() / sizeof(uint32_t)
);
#pragma clang diagnostic pop
const uint16_t SpvOpCapability = 17;
const uint16_t SpvOpTypeImage = 25;
const uint32_t SpvCapabilityStorageImageWriteWithoutFormat = 56;
const uint32_t SpvCapabilityShader = 1;
const uint32_t SpvImageFormatRgba16f = 2;
const uint32_t SpvImageFormatRgba8 = 4;
for (size_t i = 5; i < words.size();) {
const uint32_t& word = words[i]; // NOLINT ([]-usage)
const uint16_t wc = (word >> 16);
const uint16_t op = word & 0xFFFF;
// remove write without format capability
if (op == SpvOpCapability && wc >= 2) {
uint32_t& cap = words[i + 1]; // NOLINT ([]-usage)
if (cap == SpvCapabilityStorageImageWriteWithoutFormat)
cap = SpvCapabilityShader;
}
// patch format in image instructions
if (op == SpvOpTypeImage && wc >= 9) {
const uint32_t sampled = words[i + 7]; // NOLINT ([]-usage)
if (sampled == 2)
words[i + 8] = // NOLINT ([]-usage)
hdr ? SpvImageFormatRgba16f : SpvImageFormatRgba8;
}
i += wc ? wc : 1;
}
}
}
ShaderRegistry backend::buildShaderRegistry(const vk::Vulkan& vk, bool fp16,
const std::unordered_map<uint32_t, std::vector<uint8_t>>& resources) {
// patch the generate shader
std::vector<uint8_t> generate_data = getShaderSource(256, fp16, false, resources);
std::vector<uint8_t> generate_data_hdr = generate_data;
patchGenerateShader(generate_data, false);
patchGenerateShader(generate_data_hdr, true);
// load all other shaders
#define SHADER(id, p1, p2, p3, p4) \
vk::Shader(vk, getShaderSource(id, fp16, PERF, resources), \
p1, p2, p3, p4)
return {
#define PERF false
.mipmaps = SHADER(255, 1, 7, 1, 1),
.generate = vk::Shader(vk, generate_data, 5, 1, 1, 2),
.generate_hdr = vk::Shader(vk, generate_data_hdr, 5, 1, 1, 2),
.quality = {
.alpha = {
SHADER(267, 1, 2, 0, 1),
SHADER(268, 2, 2, 0, 1),
SHADER(269, 2, 4, 0, 1),
SHADER(270, 4, 4, 0, 1)
},
.beta = {
SHADER(275, 12, 2, 0, 1),
SHADER(276, 2, 2, 0, 1),
SHADER(277, 2, 2, 0, 1),
SHADER(278, 2, 2, 0, 1),
SHADER(279, 2, 6, 1, 1)
},
.gamma = {
SHADER(257, 9, 3, 1, 2),
SHADER(259, 3, 4, 0, 1),
SHADER(260, 4, 4, 0, 1),
SHADER(261, 4, 4, 0, 1),
SHADER(262, 6, 1, 1, 2)
},
.delta = {
SHADER(257, 9, 3, 1, 2),
SHADER(263, 3, 4, 0, 1),
SHADER(264, 4, 4, 0, 1),
SHADER(265, 4, 4, 0, 1),
SHADER(266, 6, 1, 1, 2),
SHADER(258, 10, 2, 1, 2),
SHADER(271, 2, 2, 0, 1),
SHADER(272, 2, 2, 0, 1),
SHADER(273, 2, 2, 0, 1),
SHADER(274, 3, 1, 1, 2)
}
},
#undef PERF
#define PERF true
.performance = {
.alpha = {
SHADER(267, 1, 1, 0, 1),
SHADER(268, 1, 1, 0, 1),
SHADER(269, 1, 2, 0, 1),
SHADER(270, 2, 2, 0, 1)
},
.beta = {
SHADER(275, 6, 2, 0, 1),
SHADER(276, 2, 2, 0, 1),
SHADER(277, 2, 2, 0, 1),
SHADER(278, 2, 2, 0, 1),
SHADER(279, 2, 6, 1, 1)
},
.gamma = {
SHADER(257, 5, 3, 1, 2),
SHADER(259, 3, 2, 0, 1),
SHADER(260, 2, 2, 0, 1),
SHADER(261, 2, 2, 0, 1),
SHADER(262, 4, 1, 1, 2)
},
.delta = {
SHADER(257, 5, 3, 1, 2),
SHADER(263, 3, 2, 0, 1),
SHADER(264, 2, 2, 0, 1),
SHADER(265, 2, 2, 0, 1),
SHADER(266, 4, 1, 1, 2),
SHADER(258, 6, 1, 1, 2),
SHADER(271, 1, 1, 0, 1),
SHADER(272, 1, 1, 0, 1),
SHADER(273, 1, 1, 0, 1),
SHADER(274, 2, 1, 1, 2)
}
},
#undef PERF
.is_fp16 = fp16
};
#undef SHADER
}

View file

@ -1,42 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "lsfg-vk-common/vulkan/shader.hpp"
#include <array>
#include <cstdint>
#include <unordered_map>
#include <vector>
namespace lsfgvk::backend {
/// shader collection struct
struct Shaders {
std::array<vk::Shader, 4> alpha;
std::array<vk::Shader, 5> beta;
std::array<vk::Shader, 5> gamma;
std::array<vk::Shader, 10> delta;
};
/// shader registry struct
struct ShaderRegistry {
vk::Shader mipmaps;
vk::Shader generate, generate_hdr;
Shaders quality;
Shaders performance;
bool is_fp16; //!< whether the fp16 shader variants were loaded
};
/// build a shader registry from resources
/// @param vk Vulkan instance
/// @param fp16 whether to load fp16 variants
/// @param resources map of resource IDs to their binary data
/// @return constructed shader registry
/// @throws ls::error if shaders are missing
/// @throws vk::vulkan_error on Vulkan errors
ShaderRegistry buildShaderRegistry(const vk::Vulkan& vk, bool fp16,
const std::unordered_map<uint32_t, std::vector<uint8_t>>& resources);
}

View file

@ -1,56 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "limits.hpp"
#include "lsfg-vk-common/vulkan/descriptor_pool.hpp"
#include <cstddef>
#include <cstdint>
using namespace lsfgvk;
using namespace lsfgvk::backend;
namespace {
const vk::Limits BASE_LIMITS{
.sets = 51,
.uniform_buffers = 3,
.samplers = 51,
.sampled_images = 165,
.storage_images = 172
};
const vk::Limits BASE_LIMITS_PERF{
.sampled_images = 91,
.storage_images = 102
};
const vk::Limits GEN_LIMITS{
.sets = 93,
.uniform_buffers = 54,
.samplers = 147,
.sampled_images = 567,
.storage_images = 261
};
const vk::Limits GEN_LIMITS_PERF{
.sampled_images = 339,
.storage_images = 183
};
}
vk::Limits backend::calculateDescriptorPoolLimits(size_t count, bool perf) {
const auto m = static_cast<uint16_t>(count);
vk::Limits a{BASE_LIMITS};
vk::Limits b{GEN_LIMITS};
if (perf) {
a.sampled_images = BASE_LIMITS_PERF.sampled_images;
b.sampled_images = GEN_LIMITS_PERF.sampled_images;
a.storage_images = BASE_LIMITS_PERF.storage_images;
b.storage_images = GEN_LIMITS_PERF.storage_images;
}
a.sets += b.sets * m;
a.uniform_buffers += b.uniform_buffers * m;
a.samplers += b.samplers * m;
a.sampled_images += b.sampled_images * m;
a.storage_images += b.storage_images * m;
return a;
}

View file

@ -1,15 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "lsfg-vk-common/vulkan/descriptor_pool.hpp"
#include <cstddef>
namespace lsfgvk::backend {
/// calculate limits for descriptor pools
/// @param count number of images
/// @param perf whether performance mode is enabled
/// @return calculated limits
vk::Limits calculateDescriptorPoolLimits(size_t count, bool perf);
}

View file

@ -1,128 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "managed_shader.hpp"
#include "lsfg-vk-common/vulkan/buffer.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/descriptor_pool.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/sampler.hpp"
#include "lsfg-vk-common/vulkan/shader.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <functional>
#include <utility>
#include <vector>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk;
using namespace lsfgvk::backend;
ManagedShaderBuilder& ManagedShaderBuilder::sampled(const vk::Image& image) {
this->sampledImages.push_back(std::ref(image));
return *this;
}
ManagedShaderBuilder& ManagedShaderBuilder::sampleds(
const std::vector<vk::Image>& images,
size_t offset, size_t count) {
if (count == 0 || offset + count > images.size())
count = images.size() - offset;
for (size_t i = 0; i < count; ++i)
this->sampledImages.push_back(std::ref(images.at(offset + i)));
return *this;
}
ManagedShaderBuilder& ManagedShaderBuilder::storage(const vk::Image& image) {
this->storageImages.push_back(std::ref(image));
return *this;
}
ManagedShaderBuilder& ManagedShaderBuilder::storages(
const std::vector<vk::Image>& images,
size_t offset, size_t count) {
if (count == 0 || offset + count > images.size())
count = images.size() - offset;
for (size_t i = 0; i < count; ++i)
this->storageImages.push_back(std::ref(images.at(offset + i)));
return *this;
}
ManagedShaderBuilder& ManagedShaderBuilder::sampler(const vk::Sampler& sampler) {
this->imageSamplers.push_back(std::ref(sampler));
return *this;
}
ManagedShaderBuilder& ManagedShaderBuilder::samplers(
const std::vector<vk::Sampler>& samplers) {
for (const auto& sampler : samplers)
this->imageSamplers.push_back(std::ref(sampler));
return *this;
}
ManagedShaderBuilder& ManagedShaderBuilder::buffer(const vk::Buffer& buffer) {
this->constantBuffers.push_back(std::ref(buffer));
return *this;
}
ManagedShader ManagedShaderBuilder::build(const vk::Vulkan& vk,
const vk::DescriptorPool& pool, const vk::Shader& shader) const {
std::vector<vk::Barrier> barriers;
barriers.reserve(this->storageImages.size() + this->sampledImages.size());
for (const auto& img : this->sampledImages)
barriers.push_back({
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = img.get().handle(),
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1
}
});
for (const auto& img : this->storageImages)
barriers.push_back({
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_SHADER_READ_BIT,
.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = img.get().handle(),
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1
}
});
return {
std::ref(shader),
std::move(barriers),
vk::DescriptorSet(vk, pool, shader,
this->sampledImages,
this->storageImages,
this->imageSamplers,
this->constantBuffers)
};
}
void ManagedShader::dispatch(const vk::Vulkan& vk, const vk::CommandBuffer& cmd,
VkExtent2D extent) const {
cmd.dispatch(vk, this->shader,
this->descriptorSet,
this->barriers,
extent.width, extent.height, 1
);
}

View file

@ -1,98 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/descriptor_pool.hpp"
#include "lsfg-vk-common/vulkan/descriptor_set.hpp"
#include "lsfg-vk-common/vulkan/shader.hpp"
#include <cstddef>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace lsfgvk::backend {
/// managed shader handling dispatch and barriers
/// this class is NOT memory-safe
class ManagedShader {
friend class ManagedShaderBuilder;
public:
/// dispatch the managed shader
/// @param vk the vulkan instance
/// @param cmd command buffer to use
/// @param extent dispatch size
/// @throws ls::vulkan_error on failure
void dispatch(const vk::Vulkan& vk,
const vk::CommandBuffer& cmd, VkExtent2D extent) const;
private:
ls::R<const vk::Shader> shader;
std::vector<vk::Barrier> barriers;
vk::DescriptorSet descriptorSet;
// simple move constructor
ManagedShader(ls::R<const vk::Shader> shader,
std::vector<vk::Barrier> barriers,
vk::DescriptorSet descriptorSet) :
shader(shader),
barriers(std::move(barriers)),
descriptorSet(std::move(descriptorSet)) {
}
};
/// class for building managed shaders
/// this class is NOT memory-safe
class ManagedShaderBuilder {
public:
/// default constructor
ManagedShaderBuilder() = default;
/// add a sampled image
/// @param image image to add
[[nodiscard]] ManagedShaderBuilder& sampled(const vk::Image& image);
/// add multiple sampled images
/// @param images images to add
/// @param offset offset into images
/// @param count number of images to add (0 = all)
[[nodiscard]] ManagedShaderBuilder& sampleds(const std::vector<vk::Image>& images,
size_t offset = 0, size_t count = 0);
/// add a storage image
/// @param image image to add
[[nodiscard]] ManagedShaderBuilder& storage(const vk::Image& image);
/// add multiple storage images
/// @param images images to add
/// @param offset offset into images
/// @param count number of images to add (0 = all)
[[nodiscard]] ManagedShaderBuilder& storages(const std::vector<vk::Image>& images,
size_t offset = 0, size_t count = 0);
/// add a sampler
/// @param sampler sampler to add
[[nodiscard]] ManagedShaderBuilder& sampler(const vk::Sampler& sampler);
/// add multiple samplers
/// @param samplers samplers to add
[[nodiscard]] ManagedShaderBuilder& samplers(const std::vector<vk::Sampler>& samplers);
/// add a buffer
/// @param buffer buffer to add
[[nodiscard]] ManagedShaderBuilder& buffer(const vk::Buffer& buffer);
/// build the managed shader
/// @param vk the vulkan instance
/// @param pool the descriptor pool to use
/// @param shader the shader to use
/// @returns the built managed shader
[[nodiscard]] ManagedShader build(const vk::Vulkan& vk,
const vk::DescriptorPool& pool, const vk::Shader& shader) const;
private:
std::vector<ls::R<const vk::Image>> sampledImages;
std::vector<ls::R<const vk::Image>> storageImages;
std::vector<ls::R<const vk::Sampler>> imageSamplers;
std::vector<ls::R<const vk::Buffer>> constantBuffers;
};
}

View file

@ -1,50 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "utils.hpp"
#include <array>
#include <cstddef>
#include <cstdint>
#include <string>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk;
using namespace lsfgvk::backend;
ConstantBuffer backend::getDefaultConstantBuffer(
size_t index, size_t total,
bool hdr, float invFlow) {
return ConstantBuffer {
.advancedColorKind = hdr ? 2U : 0U,
.hdrSupport = hdr ? 1U : 0U,
.resolutionInvScale = invFlow,
.timestamp = static_cast<float>(index + 1) / static_cast<float>(total + 1),
.uiThreshold = 0.5F
};
}
VkExtent2D backend::shift_extent(VkExtent2D extent, uint32_t i) {
return VkExtent2D{
.width = extent.width >> i,
.height = extent.height >> i
};
}
VkExtent2D backend::add_shift_extent(VkExtent2D extent, uint32_t a, uint32_t i) {
return VkExtent2D{
.width = (extent.width + a) >> i,
.height = (extent.height + a) >> i
};
}
std::string backend::to_hex_id(uint32_t id) {
const std::array<char, 17> chars = std::to_array("0123456789ABCDEF");
std::string result = "0x";
result += chars.at((id >> 12) & 0xF);
result += chars.at((id >> 8) & 0xF);
result += chars.at((id >> 4) & 0xF);
result += chars.at(id & 0xF);
return result;
}

View file

@ -1,82 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "../extraction/shader_registry.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/buffer.hpp"
#include "lsfg-vk-common/vulkan/descriptor_pool.hpp"
#include "lsfg-vk-common/vulkan/sampler.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <array>
#include <cstddef>
#include <cstdint>
#include <string>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace lsfgvk::backend {
/// exposed context data
struct Ctx {
ls::R<const vk::Vulkan> vk; // safe back reference
ls::R<const ShaderRegistry> shaders; // safe back reference
vk::DescriptorPool pool;
vk::Buffer constantBuffer;
std::vector<vk::Buffer> constantBuffers;
vk::Sampler bnbSampler; //!< border, no compare, black
vk::Sampler bnwSampler; //!< border, no compare, white
vk::Sampler eabSampler; //!< edge, always compare, black
VkExtent2D sourceExtent;
VkExtent2D flowExtent;
bool hdr;
float flow;
bool perf;
size_t count;
};
/// constant buffer used in shaders
struct ConstantBuffer {
std::array<uint32_t, 2> inputOffset;
uint32_t firstIter;
uint32_t firstIterS;
uint32_t advancedColorKind;
uint32_t hdrSupport;
float resolutionInvScale;
float timestamp;
float uiThreshold;
std::array<uint32_t, 3> pad;
};
/// get a prefilled constant buffer
/// @param index timestamp index
/// @param total total amount of images
/// @param hdr whether HDR is enabled
/// @param invFlow inverted flow scale value
/// @return prefilled constant buffer
ConstantBuffer getDefaultConstantBuffer(
size_t index, size_t total,
bool hdr, float invFlow
);
/// round down a VkExtent2D
/// @param extent the extent to shift
/// @param i the amount to shift by
/// @return the shifted extent
VkExtent2D shift_extent(VkExtent2D extent, uint32_t i);
/// round up a VkExtent2D
/// @param extent the extent to shift
/// @param a the amount to add before shifting
/// @param i the amount to shift by
/// @return the shifted extent
VkExtent2D add_shift_extent(VkExtent2D extent, uint32_t a, uint32_t i);
/// convert a device/vendor id into a hex string
std::string to_hex_id(uint32_t id);
}

View file

@ -1,666 +1,241 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "lsfg-vk-backend/lsfgvk.hpp"
#include "extraction/dll_reader.hpp"
#include "extraction/shader_registry.hpp"
#include "helpers/limits.hpp"
#include "helpers/utils.hpp"
#include "lsfg-vk-common/helpers/errors.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/buffer.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/fence.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/timeline_semaphore.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include "shaderchains/alpha0.hpp"
#include "shaderchains/alpha1.hpp"
#include "shaderchains/beta0.hpp"
#include "shaderchains/beta1.hpp"
#include "shaderchains/delta0.hpp"
#include "shaderchains/delta1.hpp"
#include "shaderchains/gamma0.hpp"
#include "shaderchains/gamma1.hpp"
#include "shaderchains/generate.hpp"
#include "shaderchains/mipmaps.hpp"
#include "lsfgvk.hpp"
#include "modules/library.hpp"
#include "modules/pipeline.hpp"
#include "utility/pipelines.hpp"
#include "utility/vkhelper.hpp"
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <exception>
#include <filesystem>
#include <functional>
#include <iostream>
#include <memory>
#include <optional>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include <vulkan/vulkan_core.h>
#ifdef LSFGVK_TESTING_RENDERDOC
#include <renderdoc_app.h>
#include <dlfcn.h>
#endif
using namespace lsfgvk;
using namespace lsfgvk::backend;
namespace lsfgvk::backend {
error::error(const std::string& msg, const std::exception& inner)
: std::runtime_error(msg + "\n- " + inner.what()) {}
error::error(const std::string& msg)
: std::runtime_error(msg) {}
error::~error() = default;
/// instance class
class InstanceImpl {
public:
/// create an instance
/// (see lsfg-vk documentation)
InstanceImpl(vk::PhysicalDeviceSelector selectPhysicalDevice,
const std::filesystem::path& shaderDllPath,
bool allowLowPrecision);
/// get the Vulkan instance
/// @return the Vulkan instance
[[nodiscard]] const auto& getVulkan() const { return this->vk; }
/// get the shader registry
/// @return the shader registry
[[nodiscard]] const auto& getShaderRegistry() const { return this->shaders; }
#ifdef LSFGVK_TESTING_RENDERDOC
/// get the RenderDoc API
/// @return the RenderDoc API
[[nodiscard]] const auto& getRenderDocAPI() const { return this->renderdoc; }
#endif
// Movable, non-copyable, custom destructor
InstanceImpl(const InstanceImpl&) = delete;
InstanceImpl& operator=(const InstanceImpl&) = delete;
InstanceImpl(InstanceImpl&&) = default;
InstanceImpl& operator=(InstanceImpl&&) = default;
~InstanceImpl();
private:
vk::Vulkan vk;
ShaderRegistry shaders;
#ifdef LSFGVK_TESTING_RENDERDOC
std::optional<RENDERDOC_API_1_6_0> renderdoc;
#endif
};
/// context class
class ContextImpl {
public:
/// create a context
/// (see lsfg-vk documentation)
ContextImpl(const InstanceImpl& instance,
std::pair<int, int> sourceFds, const std::vector<int>& destFds, int syncFd,
VkExtent2D extent, bool hdr, float flow, bool perf);
/// schedule frames
/// (see lsfg-vk documentation)
void scheduleFrames();
private:
std::pair<vk::Image, vk::Image> sourceImages;
std::vector<vk::Image> destImages;
vk::Image blackImage;
vk::TimelineSemaphore syncSemaphore; // imported
vk::TimelineSemaphore prepassSemaphore;
size_t idx{1};
size_t fidx{0}; // real frame index
std::vector<vk::CommandBuffer> cmdbufs;
vk::Fence cmdbufFence;
Ctx ctx;
Mipmaps mipmaps;
std::array<Alpha0, 7> alpha0;
std::array<Alpha1, 7> alpha1;
Beta0 beta0;
Beta1 beta1;
struct Pass {
std::vector<Gamma0> gamma0;
std::vector<Gamma1> gamma1;
std::vector<Delta0> delta0;
std::vector<Delta1> delta1;
ls::lazy<Generate> generate;
};
std::vector<Pass> passes;
};
}
Instance::Instance(
const DevicePicker& devicePicker,
const std::filesystem::path& shaderDllPath,
bool allowLowPrecision) {
const auto selectFunc = [&devicePicker](const vk::VulkanInstanceFuncs funcs,
const std::vector<VkPhysicalDevice>& devices) {
for (const auto& device : devices) {
// check if the physical device supports VK_EXT_pci_bus_info
uint32_t ext_count{};
funcs.EnumerateDeviceExtensionProperties(device, nullptr, &ext_count, VK_NULL_HANDLE);
const std::string& deviceId,
const std::filesystem::path& lsfgvkDllPath,
bool allowFP16
) {
// Create Vulkan context
auto dld{std::make_unique<vk::detail::DispatchLoaderDynamic>()};
std::vector<VkExtensionProperties> extensions(ext_count);
funcs.EnumerateDeviceExtensionProperties(device, nullptr, &ext_count, extensions.data());
auto instance{vkhelper::createInstance(*dld)};
auto physdev{vkhelper::findPhysicalDevice(*dld, *instance, deviceId)};
const bool has_pci_ext = std::ranges::find_if(extensions,
[](const VkExtensionProperties& ext) {
return std::string(std::to_array(ext.extensionName).data())
== VK_EXT_PCI_BUS_INFO_EXTENSION_NAME;
}) != extensions.end();
const uint32_t qfi{vkhelper::findComputeQueueFamilyIndex(*dld, physdev)};
const bool fp16{allowFP16 && vkhelper::checkHalfPrecisionSupport(*dld, physdev)};
// then fetch all available properties
VkPhysicalDevicePCIBusInfoPropertiesEXT pciInfo{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT
};
VkPhysicalDeviceProperties2 props{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
.pNext = has_pci_ext ? &pciInfo : nullptr
};
funcs.GetPhysicalDeviceProperties2(device, &props);
auto [device, queue] = vkhelper::createDevice(*dld, physdev, qfi, fp16);
std::array<char, 256> devname = std::to_array(props.properties.deviceName);
devname.at(255) = '\0'; // ensure null-termination
if (devicePicker(
std::string(devname.data()),
{ backend::to_hex_id(props.properties.vendorID),
backend::to_hex_id(props.properties.deviceID) },
has_pci_ext ? std::optional<std::string>{
std::to_string(pciInfo.pciBus) + ":" +
std::to_string(pciInfo.pciDevice) + "." +
std::to_string(pciInfo.pciFunction)
} : std::nullopt
))
return device;
}
throw ls::vulkan_error("no suitable physical device found");
// Construct instance
library::ShaderLibrary library{
*dld,
*device,
fp16,
lsfgvkDllPath
};
this->m_impl = std::make_unique<InstanceImpl>(
selectFunc, shaderDllPath, allowLowPrecision
this->m_priv = std::make_unique<priv::Instance>(priv::Instance {
.vk = {
.dld = std::move(dld),
.instance = std::move(instance),
.physdev = physdev,
.device = std::move(device),
.queue = queue,
.qfi = qfi,
.fp16 = fp16
},
.shaderLibrary = std::move(library)
});
}
Context::Context(
const Instance& instance,
uint32_t width,
uint32_t height,
float flowScale,
bool performanceMode
) {
const auto& vk{instance.m_priv->vk};
pipeline::Pipeline pipeline{
*vk.dld,
*vk.device,
vk.physdev,
vk.queue,
vk.qfi,
instance.m_priv->shaderLibrary,
lsfgvk::getPipelineSignature(performanceMode),
{ width, height },
flowScale,
performanceMode,
false
};
this->m_priv = std::make_unique<priv::Context>(priv::Context {
.instance = std::ref(*instance.m_priv),
.pipeline = std::move(pipeline),
.syncSemaphore = { vkhelper::createTimelineSemaphore(*vk.dld, *vk.device, true), 0 },
.internalSemaphores = { vkhelper::createTimelineSemaphore(*vk.dld, *vk.device), 0 },
.fence = vkhelper::createFence(*vk.dld, *vk.device),
});
}
FileDescriptors Context::exportFds() const {
const auto& vk{this->m_priv->instance.get().vk};
const auto& pipeline{this->m_priv->pipeline};
return{
.sourceFd = vkhelper::exportMemoryFd(
*vk.dld, *vk.device,
pipeline.getExternalInputs().front().memory
),
.destinationFd = vkhelper::exportMemoryFd(
*vk.dld, *vk.device,
pipeline.getExternalOutputs().front().memory
),
.syncFd = vkhelper::exportSemaphoreFd(
*vk.dld, *vk.device,
*this->m_priv->syncSemaphore.first
)
};
}
void Context::dispatch(uint32_t total) {
auto& ctx{*this->m_priv};
const auto& vk{ctx.instance.get().vk};
// Increment iteration counter after previous frame is completed
auto* mapped{ctx.pipeline.getMappedBuffer()};
if (ctx.firstIteration) {
ctx.firstIteration = false;
mapped->iteration = 0;
} else {
if (vk.device->waitForFences(*ctx.fence, true, UINT64_MAX, *vk.dld) != vk::Result::eSuccess)
throw std::runtime_error("Unable to wait for completion of previous iteration");
vk.device->resetFences(*ctx.fence, *vk.dld);
mapped->iteration++;
}
const auto& cmdbufs{ctx.pipeline.getCmdbufs()};
// Dispatch pre-pass
auto& sync{ctx.syncSemaphore};
sync.second++;
auto& internal{ctx.internalSemaphores};
internal.second++;
vk::TimelineSemaphoreSubmitInfo timelineInfo{
.waitSemaphoreValueCount = 1,
.pWaitSemaphoreValues = &sync.second,
.signalSemaphoreValueCount = 1,
.pSignalSemaphoreValues = &internal.second
};
const vk::PipelineStageFlags waitStage{vk::PipelineStageFlagBits::eTopOfPipe};
vk.queue.submit(
{{
.pNext = &timelineInfo,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &*sync.first,
.pWaitDstStageMask = &waitStage,
.commandBufferCount = 1U,
.pCommandBuffers = &*cmdbufs.at(0),
.signalSemaphoreCount = 1,
.pSignalSemaphores = &*internal.first
}},
nullptr,
*vk.dld
);
}
namespace {
/// find the cache file path
std::filesystem::path findCacheFilePath() {
const char* xdgCacheHome = std::getenv("XDG_CACHE_HOME");
if (xdgCacheHome && *xdgCacheHome != '\0')
return std::filesystem::path(xdgCacheHome) / "lsfg-vk_pipeline_cache.bin";
// Dispatch main passes
uint64_t prevInternal{};
for (uint32_t i = 0; i < total; i++) {
const auto& transCmdbuf{ctx.pipeline.buildTransCmdbuf(
*vk.dld, *vk.device,
mapped->iteration,
i, total
)};
const char* home = std::getenv("HOME");
if (home && *home != '\0')
return std::filesystem::path(home) / ".cache" / "lsfg-vk_pipeline_cache.bin";
return{"/tmp/lsfg-vk_pipeline_cache.bin"};
}
/// create a Vulkan instance
vk::Vulkan createVulkanInstance(vk::PhysicalDeviceSelector selectPhysicalDevice) {
try {
return{
"lsfg-vk", vk::version{2, 0, 0},
"lsfg-vk-engine", vk::version{2, 0, 0},
selectPhysicalDevice,
false, std::nullopt,
findCacheFilePath()
};
} catch (const std::exception& e) {
throw backend::error("Unable to initialize Vulkan", e);
}
}
/// build a shader registry
ShaderRegistry createShaderRegistry(vk::Vulkan& vk,
const std::filesystem::path& shaderDllPath,
bool allowLowPrecision) {
std::unordered_map<uint32_t, std::vector<uint8_t>> resources{};
try {
resources = backend::extractResourcesFromDLL(shaderDllPath);
} catch (const std::exception& e) {
throw backend::error("Unable to parse Lossless Scaling DLL", e);
// Transition command buffer to next timestamp
if (i == 0) {
prevInternal = internal.second;
timelineInfo.pWaitSemaphoreValues = &prevInternal;
} else {
sync.second++;
timelineInfo.pWaitSemaphoreValues = &sync.second;
}
try {
return backend::buildShaderRegistry(
vk, allowLowPrecision && vk.supportsFP16(),
resources
);
} catch (const std::exception& e) {
throw backend::error("Unable to build shader registry", e);
}
}
#ifdef LSFGVK_TESTING_RENDERDOC
/// load RenderDoc integration
std::optional<RENDERDOC_API_1_6_0> loadRenderDocIntegration() {
void* module = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD);
if (!module)
return std::nullopt;
internal.second++;
timelineInfo.pSignalSemaphoreValues = &internal.second;
auto renderdocGetAPI = reinterpret_cast<pRENDERDOC_GetAPI>(
dlsym(module, "RENDERDOC_GetAPI"));
if (!renderdocGetAPI)
return std::nullopt;
vk.queue.submit(
{{
.pNext = &timelineInfo,
.waitSemaphoreCount = 1,
.pWaitSemaphores = i == 0 ? &*internal.first : &*sync.first,
.pWaitDstStageMask = &waitStage,
.commandBufferCount = 1,
.pCommandBuffers = &transCmdbuf,
.signalSemaphoreCount = 1,
.pSignalSemaphores = &*internal.first
}},
nullptr,
*vk.dld
);
RENDERDOC_API_1_6_0* api{};
renderdocGetAPI(eRENDERDOC_API_Version_1_6_0, reinterpret_cast<void**>(&api));
if (!api)
return std::nullopt;
// Dispatch main pass
timelineInfo.pWaitSemaphoreValues = &internal.second;
return *api;
}
#endif
}
sync.second++;
timelineInfo.pSignalSemaphoreValues = &sync.second;
InstanceImpl::InstanceImpl(vk::PhysicalDeviceSelector selectPhysicalDevice,
const std::filesystem::path& shaderDllPath,
bool allowLowPrecision)
: vk(createVulkanInstance(selectPhysicalDevice)),
shaders(createShaderRegistry(this->vk, shaderDllPath,
allowLowPrecision && vk.supportsFP16())) {
#ifdef LSFGVK_TESTING_RENDERDOC
this->renderdoc = loadRenderDocIntegration();
#endif
vk.persistPipelineCache(); // will silently fail
}
Context& Instance::openContext(std::pair<int, int> sourceFds, const std::vector<int>& destFds,
int syncFd, uint32_t width, uint32_t height,
bool hdr, float flow, bool perf) {
const VkExtent2D extent{ width, height };
return *this->m_contexts.emplace_back(std::make_unique<ContextImpl>(*this->m_impl,
sourceFds, destFds, syncFd,
extent, hdr, flow, perf
)).get();
}
namespace {
/// import source images
std::pair<vk::Image, vk::Image> importImages(const vk::Vulkan& vk,
const std::pair<int, int>& sourceFds,
VkExtent2D extent, VkFormat format) {
try {
return {
vk::Image(vk, extent, format,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, sourceFds.first),
vk::Image(vk, extent, format,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, sourceFds.second)
};
} catch (const std::exception& e) {
throw backend::error("Unable to import destination images", e);
}
}
/// import destination images
std::vector<vk::Image> importImages(const vk::Vulkan& vk,
const std::vector<int>& destFds,
VkExtent2D extent, VkFormat format) {
try {
std::vector<vk::Image> destImages;
destImages.reserve(destFds.size());
for (const auto& fd : destFds)
destImages.emplace_back(vk, extent, format,
VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, fd);
return destImages;
} catch (const std::exception& e) {
throw backend::error("Unable to import destination images", e);
}
}
/// create a black image
vk::Image createBlackImage(const vk::Vulkan& vk) {
try {
return{vk,
{ .width = 4, .height = 4 }
};
} catch (const std::exception& e) {
throw backend::error("Unable to create black image", e);
}
}
/// import timeline semaphore
vk::TimelineSemaphore importTimelineSemaphore(const vk::Vulkan& vk, int syncFd) {
try {
return{vk, 0, syncFd};
} catch (const std::exception& e) {
throw backend::error("Unable to import timeline semaphore", e);
}
}
/// create prepass semaphores
vk::TimelineSemaphore createPrepassSemaphore(const vk::Vulkan& vk) {
try {
return{vk, 0};
} catch (const std::exception& e) {
throw backend::error("Unable to create prepass semaphore", e);
}
}
/// create command buffers
std::vector<vk::CommandBuffer> createCommandBuffers(const vk::Vulkan& vk, size_t count) {
try {
std::vector<vk::CommandBuffer> cmdbufs;
cmdbufs.reserve(count);
for (size_t i = 0; i < count; ++i)
cmdbufs.emplace_back(vk);
return cmdbufs;
} catch (const std::exception& e) {
throw backend::error("Unable to create command buffers", e);
}
}
/// create context data
Ctx createCtx(const InstanceImpl& instance, VkExtent2D extent,
bool hdr, float flow, bool perf, size_t count) {
const auto& vk = instance.getVulkan();
const auto& shaders = instance.getShaderRegistry();
try {
std::vector<vk::Buffer> constantBuffers{};
constantBuffers.reserve(count);
for (size_t i = 0; i < count; ++i)
constantBuffers.emplace_back(vk,
backend::getDefaultConstantBuffer(
i, count,
hdr, flow
)
);
return {
.vk = std::ref(vk),
.shaders = std::ref(shaders),
.pool{vk, backend::calculateDescriptorPoolLimits(count, perf)},
.constantBuffer{vk, backend::getDefaultConstantBuffer(0, 1, hdr, flow)},
.constantBuffers{std::move(constantBuffers)},
.bnbSampler{vk, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, false},
.bnwSampler{vk, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_COMPARE_OP_NEVER, true},
.eabSampler{vk, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS, false},
.sourceExtent = extent,
.flowExtent = VkExtent2D {
.width = static_cast<uint32_t>(static_cast<float>(extent.width) / flow),
.height = static_cast<uint32_t>(static_cast<float>(extent.height) / flow)
},
.hdr = hdr,
.flow = flow,
.perf = perf,
.count = count
};
} catch (const std::exception& e) {
throw backend::error("Unable to create context", e);
}
}
}
ContextImpl::ContextImpl(const InstanceImpl& instance,
std::pair<int, int> sourceFds, const std::vector<int>& destFds, int syncFd,
VkExtent2D extent, bool hdr, float flow, bool perf) :
sourceImages(importImages(instance.getVulkan(), sourceFds,
extent, hdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM)),
destImages(importImages(instance.getVulkan(), destFds,
extent, hdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM)),
blackImage(createBlackImage(instance.getVulkan())),
syncSemaphore(importTimelineSemaphore(instance.getVulkan(), syncFd)),
prepassSemaphore(createPrepassSemaphore(instance.getVulkan())),
cmdbufs(createCommandBuffers(instance.getVulkan(), destFds.size() + 1)),
cmdbufFence(instance.getVulkan()),
ctx(createCtx(instance, extent, hdr, flow, perf, destFds.size())),
mipmaps(ctx, sourceImages),
alpha0{
Alpha0(ctx, mipmaps.getImages().at(0)),
Alpha0(ctx, mipmaps.getImages().at(1)),
Alpha0(ctx, mipmaps.getImages().at(2)),
Alpha0(ctx, mipmaps.getImages().at(3)),
Alpha0(ctx, mipmaps.getImages().at(4)),
Alpha0(ctx, mipmaps.getImages().at(5)),
Alpha0(ctx, mipmaps.getImages().at(6))
},
alpha1{
Alpha1(ctx, 3, alpha0.at(0).getImages()),
Alpha1(ctx, 2, alpha0.at(1).getImages()),
Alpha1(ctx, 2, alpha0.at(2).getImages()),
Alpha1(ctx, 2, alpha0.at(3).getImages()),
Alpha1(ctx, 2, alpha0.at(4).getImages()),
Alpha1(ctx, 2, alpha0.at(5).getImages()),
Alpha1(ctx, 2, alpha0.at(6).getImages())
},
beta0(ctx, alpha1.at(0).getImages()),
beta1(ctx, beta0.getImages()) {
// build main passes
for (size_t i = 0; i < destImages.size(); ++i) {
auto& pass = this->passes.emplace_back();
pass.gamma0.reserve(7);
pass.gamma1.reserve(7);
pass.delta0.reserve(3);
pass.delta1.reserve(3);
for (size_t j = 0; j < 7; j++) {
if (j == 0) { // first pass has no prior data
pass.gamma0.emplace_back(ctx, i,
this->alpha1.at(6 - j).getImages(),
this->blackImage
);
pass.gamma1.emplace_back(ctx, i,
pass.gamma0.at(j).getImages(),
this->blackImage,
this->beta1.getImages().at(5)
);
} else { // other passes use prior data
pass.gamma0.emplace_back(ctx, i,
this->alpha1.at(6 - j).getImages(),
pass.gamma1.at(j - 1).getImage()
);
pass.gamma1.emplace_back(ctx, i,
pass.gamma0.at(j).getImages(),
pass.gamma1.at(j - 1).getImage(),
this->beta1.getImages().at(6 - j)
);
}
if (j == 4) { // first special pass has no prior data
pass.delta0.emplace_back(ctx, i,
this->alpha1.at(6 - j).getImages(),
this->blackImage,
pass.gamma1.at(j - 1).getImage()
);
pass.delta1.emplace_back(ctx, i,
pass.delta0.at(j - 4).getImages0(),
pass.delta0.at(j - 4).getImages1(),
this->blackImage,
this->beta1.getImages().at(6 - j),
this->blackImage
);
} else if (j > 4) { // further passes do
pass.delta0.emplace_back(ctx, i,
this->alpha1.at(6 - j).getImages(),
pass.delta1.at(j - 5).getImage0(),
pass.gamma1.at(j - 1).getImage()
);
pass.delta1.emplace_back(ctx, i,
pass.delta0.at(j - 4).getImages0(),
pass.delta0.at(j - 4).getImages1(),
pass.delta1.at(j - 5).getImage0(),
this->beta1.getImages().at(6 - j),
pass.delta1.at(j - 5).getImage1()
);
}
}
pass.generate.emplace(ctx, i,
this->sourceImages,
pass.gamma1.at(6).getImage(),
pass.delta1.at(2).getImage0(),
pass.delta1.at(2).getImage1(),
this->destImages.at(i)
vk.queue.submit(
{{
.pNext = &timelineInfo,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &*internal.first,
.pWaitDstStageMask = &waitStage,
.commandBufferCount = 1,
.pCommandBuffers = &*cmdbufs.at(1),
.signalSemaphoreCount = 1,
.pSignalSemaphores = &*sync.first
}},
i == (total - 1) ? *ctx.fence : nullptr,
*vk.dld
);
}
// initialize all images
std::vector<VkImage> images{};
images.push_back(this->blackImage.handle());
mipmaps.prepare(images);
for (size_t i = 0; i < 7; ++i) {
alpha0.at(i).prepare(images);
alpha1.at(i).prepare(images);
}
beta0.prepare(images);
beta1.prepare(images);
for (const auto& pass : this->passes) {
for (size_t i = 0; i < 7; ++i) {
pass.gamma0.at(i).prepare(images);
pass.gamma1.at(i).prepare(images);
if (i < 4) continue;
pass.delta0.at(i - 4).prepare(images);
pass.delta1.at(i - 4).prepare(images);
}
}
std::vector<vk::Barrier> barriers{};
barriers.reserve(images.size());
for (const auto& image : images) {
barriers.emplace_back(vk::Barrier {
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.levelCount = 1,
.layerCount = 1
}
});
}
const vk::CommandBuffer cmdbuf{ctx.vk};
cmdbuf.begin(ctx.vk);
cmdbuf.insertBarriers(ctx.vk, barriers);
cmdbuf.end(ctx.vk);
cmdbuf.submit(ctx.vk); // wait for completion
}
void Instance::scheduleFrames(Context& context) { // NOLINT (static)
#ifdef LSFGVK_TESTING_RENDERDOC
const auto& impl = this->m_impl;
if (impl->getRenderDocAPI()) {
impl->getRenderDocAPI()->StartFrameCapture(
RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(impl->getVulkan().inst()),
nullptr);
}
#endif
void Context::idle() const {
const auto& ctx{*this->m_priv};
const auto& vk{ctx.instance.get().vk};
vk.device->waitIdle(*vk.dld);
}
Context::~Context() {
try {
context.scheduleFrames();
} catch (const std::exception& e) {
throw backend::error("Unable to schedule frames", e);
// NOTE: This will freeze if the user didn't signal the sync semaphore high enough to
// allow the pipeline to complete.
this->idle();
} catch (...) { // NOLINT (empty catch)
// Not much we can do here..
}
#ifdef LSFGVK_TESTING_RENDERDOC
if (impl->getRenderDocAPI()) {
impl->getVulkan().df().DeviceWaitIdle(impl->getVulkan().dev());
impl->getRenderDocAPI()->EndFrameCapture(
RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(impl->getVulkan().inst()),
nullptr);
}
#endif
}
void Context::scheduleFrames() {
// wait for previous pre-pass to complete
if (this->fidx && !this->cmdbufFence.wait(this->ctx.vk))
throw backend::error("Timeout waiting for previous frame to complete");
this->cmdbufFence.reset(this->ctx.vk);
// schedule pre-pass
const auto& cmdbuf = this->cmdbufs.at(0);
cmdbuf.begin(ctx.vk);
this->mipmaps.render(ctx.vk, cmdbuf, this->fidx);
for (size_t i = 0; i < 7; ++i) {
this->alpha0.at(6 - i).render(ctx.vk, cmdbuf);
this->alpha1.at(6 - i).render(ctx.vk, cmdbuf, this->fidx);
}
this->beta0.render(ctx.vk, cmdbuf, this->fidx);
this->beta1.render(ctx.vk, cmdbuf);
cmdbuf.end(ctx.vk);
cmdbuf.submit(this->ctx.vk,
{}, this->syncSemaphore.handle(), this->idx,
{}, this->prepassSemaphore.handle(), this->idx
);
this->idx++;
// schedule main passes
for (size_t i = 0; i < this->destImages.size(); i++) {
const auto& cmdbuf = this->cmdbufs.at(i + 1);
cmdbuf.begin(ctx.vk);
const auto& pass = this->passes.at(i);
for (size_t j = 0; j < 7; j++) {
pass.gamma0.at(j).render(ctx.vk, cmdbuf, this->fidx);
pass.gamma1.at(j).render(ctx.vk, cmdbuf);
if (j < 4) continue;
pass.delta0.at(j - 4).render(ctx.vk, cmdbuf, this->fidx);
pass.delta1.at(j - 4).render(ctx.vk, cmdbuf);
}
pass.generate->render(ctx.vk, cmdbuf, this->fidx);
cmdbuf.end(ctx.vk);
cmdbuf.submit(this->ctx.vk,
{}, this->prepassSemaphore.handle(), this->idx - 1,
{}, this->syncSemaphore.handle(), this->idx + i,
i == this->destImages.size() - 1 ? this->cmdbufFence.handle() : VK_NULL_HANDLE
);
}
this->idx += this->destImages.size();
this->fidx++;
VkInstance Instance::_instance() const {
return this->m_priv->vk.instance.get();
}
void Instance::closeContext(const Context& context) {
auto it = std::ranges::find_if(this->m_contexts,
[context = &context](const std::unique_ptr<ContextImpl>& ctx) {
return ctx.get() == context;
});
if (it == this->m_contexts.end())
throw backend::error("attempted to close unknown context",
std::runtime_error("no such context"));
const auto& vk = this->m_impl->getVulkan();
vk.df().DeviceWaitIdle(vk.dev());
this->m_contexts.erase(it);
VkDevice Instance::_device() const {
return *this->m_priv->vk.device;
}
Instance::~Instance() = default;
// leaking shenanigans
namespace {
bool leaking{false}; // NOLINT (global variable)
}
InstanceImpl::~InstanceImpl() {
if (!leaking) return;
try {
new vk::Vulkan(std::move(this->vk));
} catch (...) {
std::cerr << "lsfg-vk: failed to leak Vulkan instance\n";
}
}
void backend::makeLeaking() {
leaking = true;
}

View file

@ -0,0 +1,59 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#define LSFGVK_PRIV
#include "lsfg-vk/lsfgvk.hpp" // IWYU pragma: export
#include "modules/pipeline.hpp"
#include "modules/library.hpp"
#include "utility/vkhelper.hpp"
#include <cstdint>
#include <functional>
#include <utility>
namespace lsfgvk::priv {
/// Internal state of lsfg-vk
struct Instance {
/// Vulkan context
struct Vulkan {
/// Vulkan dispatch loader
std::unique_ptr<vk::detail::DispatchLoaderDynamic> dld;
/// Vulkan instance (1.2)
vk::UniqueInstance instance;
/// Vulkan physical device
vk::PhysicalDevice physdev;
/// Vulkan device with synchronization2 (extension), external memory & semaphore
/// fd (extension) and timeline semaphores (core) enabled
vk::UniqueDevice device;
/// Compute queue
vk::Queue queue;
/// Compute queue family index
uint32_t qfi;
/// Whether fp16 is enabled and supported (shaderFloat16 is enabled)
bool fp16;
} vk;
/// Shader library
library::ShaderLibrary shaderLibrary;
};
/// Internal context for frame generation
struct Context {
/// Parent instance
std::reference_wrapper<Instance> instance;
/// Pipeline instance
pipeline::Pipeline pipeline;
/// Shared synchronization semaphores
std::pair<vk::UniqueSemaphore, uint64_t> syncSemaphore;
/// Internal synchronization semaphores
std::pair<vk::UniqueSemaphore, uint64_t> internalSemaphores;
/// Frames-in-flight fence
vk::UniqueFence fence;
/// Is first iteration
bool firstIteration{true};
};
}

View file

@ -0,0 +1,91 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "library.hpp"
#include "library/dll.hpp"
#include "utility/vkhelper.hpp"
#include <array>
#include <cstdint>
#include <filesystem>
#include <stdexcept>
#include <string>
#include <string_view>
#include <utility>
/// All base shaders in the library.
const std::array<std::pair<std::string_view, uint32_t>, 3> BASE_LIBRARY{{
{ "mipmaps", 0 },
{ "generate_8bit", 1 },
{ "generate_16bit", 2 },
}};
/// All non-base shaders in the library.
const std::array<std::pair<std::string_view, uint32_t>, 24> LIBRARY{{
{ "alpha0", 13 },
{ "alpha1", 14 },
{ "alpha2", 15 },
{ "alpha3", 16 },
{ "beta0", 22 },
{ "beta1", 23 },
{ "beta2", 24 },
{ "beta3", 25 },
{ "beta4", 26 },
{ "gamma0", 3 },
{ "gamma1", 4 },
{ "gamma2", 5 },
{ "gamma3", 6 },
{ "gamma4", 7 },
{ "delta0", 8 },
{ "delta1", 9 },
{ "delta2", 10 },
{ "delta3", 11 },
{ "delta4", 12 },
{ "epsilon0", 17 },
{ "epsilon1", 18 },
{ "epsilon2", 19 },
{ "epsilon3", 20 },
{ "epsilon4", 21 }
}};
using namespace lsfgvk::library;
ShaderLibrary::ShaderLibrary(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
bool halfPrecision,
const std::filesystem::path& dll
) {
if (!std::filesystem::exists(dll)) {
throw std::runtime_error("The specified shader DLL does not exist");
}
// Create shader modules for each shader in the library
const auto resources = priv::parseDll(dll);
for (const auto& [name, idx] : BASE_LIBRARY) {
const uint32_t rid{idx};
const auto& it = resources.find(rid == 0 ? 2147488584U : rid);
if (it == resources.end())
throw std::runtime_error(
"Unable to find base shader '" + std::string(name) + "' in DLL"
);
this->m_baseShaders[name] = vkhelper::createShaderModule(dld, device, it->second);
}
for (const auto& [name, idx] : LIBRARY) {
const std::pair<uint32_t, uint32_t> rid{
idx + (halfPrecision ? 48 : 0),
idx + (halfPrecision ? 48 : 0) + 24
};
const auto& qit{resources.find(rid.first)};
const auto& pit{resources.find(rid.second)};
if (qit == resources.end() || pit == resources.end())
throw std::runtime_error(
"Unable to find shader '" + std::string(name) + "' in DLL"
);
this->m_qualityShaders[name] = vkhelper::createShaderModule(dld, device, qit->second);
this->m_performanceShaders[name] = vkhelper::createShaderModule(dld, device, pit->second);
}
}

View file

@ -0,0 +1,67 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "utility/vkhelper.hpp"
#include <filesystem>
#include <string_view>
#include <unordered_map>
namespace lsfgvk::library {
///
/// The lsfg-vk shader library
///
class ShaderLibrary {
public:
///
/// Create the shader library
///
/// @param dld Vulkan dynamic dispatch loader
/// @param device Vulkan device
/// @param halfPrecision Whether to load the half-precision shader variants
/// @param dll Path to the shader DLL file
/// @throws std::runtime_error on failure
///
explicit ShaderLibrary(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
bool halfPrecision,
const std::filesystem::path& dll
);
///
/// Get a base shader by name
///
/// @param name Shader name
/// @return A reference to the shader
/// @throws std::out_of_range if the shader is not found
///
[[nodiscard]] const auto& baseShader(std::string_view name) const {
return this->m_baseShaders.at(name);
}
///
/// Get a shader by name
///
/// @param name Shader name
/// @param perf Whether to get the performance variant of the shader
/// @return A reference to the shader
/// @throws std::out_of_range if the shader is not found
///
[[nodiscard]] const auto& shader(std::string_view name, bool perf) const {
auto it{this->m_baseShaders.find(name)};
if (it != this->m_baseShaders.end())
return it->second;
return perf ? this->m_performanceShaders.at(name) : this->m_qualityShaders.at(name);
}
private:
std::unordered_map<std::string_view, vk::UniqueShaderModule> m_baseShaders;
std::unordered_map<std::string_view, vk::UniqueShaderModule> m_qualityShaders;
std::unordered_map<std::string_view, vk::UniqueShaderModule> m_performanceShaders;
};
}

View file

@ -1,31 +1,27 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "dll_reader.hpp"
#include "lsfg-vk-common/helpers/errors.hpp"
#include "dll.hpp"
#include <ios>
#include <unordered_map>
#include <filesystem>
#include <algorithm>
#include <iostream>
#include <optional>
#include <array>
#include <cstddef>
#include <cstdint>
#include <filesystem>
#include <fstream>
#include <ios>
#include <iostream>
#include <optional>
#include <span>
#include <stdexcept>
#include <unordered_map>
#include <utility>
#include <vector>
#include <array>
#include <span>
using namespace lsfgvk;
using namespace lsfgvk::backend;
namespace {
/// DOS file header
struct DOSHeader {
uint16_t magic; // 0x5A4D
std::array<uint16_t, 29> pad;
int32_t pe_offset; // file offset
int32_t pe_offset; // File offset
};
/// PE header
@ -42,15 +38,15 @@ namespace {
struct PEOptionalHeader {
uint16_t magic; // 0x20B
std::array<uint16_t, 63> pad4;
std::pair<uint32_t, uint32_t> resource_table; // file offset/size
std::pair<uint32_t, uint32_t> resource_table; // File offset/size
};
/// Section header
struct SectionHeader {
std::array<uint16_t, 4> pad1;
uint32_t vsize; // virtual
uint32_t vsize; // Virtual
uint32_t vaddress;
uint32_t fsize; // raw
uint32_t fsize; // Raw
uint32_t foffset;
std::array<uint16_t, 8> pad2;
};
@ -65,7 +61,7 @@ namespace {
/// Resource directory entry
struct ResourceDirectoryEntry {
uint32_t id;
uint32_t offset; // high bit = directory
uint32_t offset; // High bit = Directory
};
/// Resource data entry
@ -74,68 +70,68 @@ namespace {
uint32_t size;
std::array<uint32_t, 2> pad;
};
}
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunknown-warning-option"
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage-in-container"
namespace {
/// Safely cast a vector to a pointer of type T
template<typename T>
const T* safe_cast(const std::vector<uint8_t>& data, size_t offset) {
const size_t end = offset + sizeof(T);
const size_t end{offset + sizeof(T)};
if (end > data.size() || end < offset)
throw ls::error("buffer overflow/underflow during safe cast");
return reinterpret_cast<const T*>(&data.at(offset));
throw std::runtime_error("Buffer overflow/underflow during safe cast");
return reinterpret_cast<const T*>(&data.at(offset)); // NOLINT (unsafe cast)
}
/// Safely cast a vector to a span of T
template<typename T>
std::span<const T> span_cast(const std::vector<uint8_t>& data, size_t offset, size_t count) {
const size_t end = offset + (count * sizeof(T));
const size_t end{offset + (count * sizeof(T))};
if (end > data.size() || end < offset)
throw ls::error("buffer overflow/underflow during safe cast");
return std::span<const T>(reinterpret_cast<const T*>(&data.at(offset)), count);
throw std::runtime_error("Buffer overflow/underflow during safe cast");
return{ reinterpret_cast<const T*>(&data.at(offset)), count }; // NOLINT (unsafe cast)
}
}
#pragma clang diagnostic pop
}
std::unordered_map<uint32_t, std::vector<uint8_t>> backend::extractResourcesFromDLL(
const std::filesystem::path& dll) {
using namespace lsfgvk::library;
std::unordered_map<uint32_t, std::vector<uint32_t>> priv::parseDll(
const std::filesystem::path& dll
) {
std::ifstream file(dll, std::ios::binary | std::ios::ate);
if (!file.is_open())
throw ls::error("failed to open dll file");
throw std::runtime_error("Unable to open file");
const std::streamsize size = static_cast<std::streamsize>(file.tellg());
const std::streamsize size{static_cast<std::streamsize>(file.tellg())};
file.seekg(0, std::ios::beg);
std::vector<uint8_t> data(static_cast<size_t>(size));
if (!file.read(reinterpret_cast<char*>(data.data()), size))
throw ls::error("failed to read dll file");
if (!file.read(reinterpret_cast<char*>(data.data()), size)) // NOLINT (unsafe cast)
throw std::runtime_error("Unable to read file");
// parse dos header
size_t fileOffset = 0;
const auto* dosHdr = safe_cast<const DOSHeader>(data, 0);
// Parse dos header
size_t fileOffset{0};
const auto* dosHdr{safe_cast<const DOSHeader>(data, 0)};
if (dosHdr->magic != 0x5A4D)
throw ls::error("dos header magic number is incorrect");
throw std::runtime_error("Magic number in DOS header is incorrect");
// parse pe header
// Parse pe header
fileOffset += static_cast<size_t>(dosHdr->pe_offset);
const auto* peHdr = safe_cast<const PEHeader>(data, fileOffset);
const auto* peHdr{safe_cast<const PEHeader>(data, fileOffset)};
if (peHdr->signature != 0x00004550)
throw ls::error("pe header signature is incorrect");
throw std::runtime_error("Signature in PE header is incorrect");
// parse optional pe header
// Parse optional pe header
fileOffset += sizeof(PEHeader);
const auto* peOptHdr = safe_cast<const PEOptionalHeader>(data, fileOffset);
const auto* peOptHdr{safe_cast<const PEOptionalHeader>(data, fileOffset)};
if (peOptHdr->magic != 0x20B)
throw ls::error("pe format is not PE32+");
throw std::runtime_error("PE format is not PE32+");
const auto& [rsrc_rva, rsrc_size] = peOptHdr->resource_table;
// locate section containing resources
// }Locate section containing resources
std::optional<size_t> rsrc_offset;
fileOffset += peHdr->opt_hdr_size;
const auto sectHdrs = span_cast<const SectionHeader>(data, fileOffset, peHdr->sect_count);
const auto sectHdrs{span_cast<const SectionHeader>(data, fileOffset, peHdr->sect_count)};
for (const auto& sectHdr : sectHdrs) {
if (rsrc_rva < sectHdr.vaddress || rsrc_rva > (sectHdr.vaddress + sectHdr.vsize))
continue;
@ -144,69 +140,71 @@ std::unordered_map<uint32_t, std::vector<uint8_t>> backend::extractResourcesFrom
break;
}
if (!rsrc_offset)
throw ls::error("unable to locate resource section");
throw std::runtime_error("Unable to locate resource section");
// parse resource directory
// Parse resource directory
fileOffset = rsrc_offset.value();
const auto* rsrcDir = safe_cast<const ResourceDirectory>(data, fileOffset);
const auto* rsrcDir{safe_cast<const ResourceDirectory>(data, fileOffset)};
if (rsrcDir->id_count < 3)
throw ls::error("resource directory does not have enough entries");
throw std::runtime_error("Resource directory does not have enough entries");
// find resource table with data type
// Find resource table with data type
std::optional<size_t> rsrc_tbl_offset;
fileOffset = rsrc_offset.value() + sizeof(ResourceDirectory);
const auto rsrcDirEntries = span_cast<const ResourceDirectoryEntry>(
data, fileOffset, rsrcDir->name_count + rsrcDir->id_count);
const auto rsrcDirEntries{span_cast<const ResourceDirectoryEntry>(
data, fileOffset, rsrcDir->name_count + rsrcDir->id_count)};
for (const auto& rsrcDirEntry : rsrcDirEntries) {
if (rsrcDirEntry.id != 10) // RT_RCDATA
continue;
if ((rsrcDirEntry.offset & 0x80000000) == 0)
throw ls::error("expected resource directory, found data entry");
throw std::runtime_error("Expected resource directory, found data entry");
rsrc_tbl_offset.emplace(rsrcDirEntry.offset & 0x7FFFFFFF);
}
if (!rsrc_tbl_offset)
throw ls::error("unabele to locate RT_RCDATA directory");
throw std::runtime_error("Unable to locate RT_RCDATA directory");
// parse data type resource directory
// Parse data type resource directory
fileOffset = rsrc_offset.value() + rsrc_tbl_offset.value();
const auto* rsrcTbl = safe_cast<const ResourceDirectory>(data, fileOffset);
const auto* rsrcTbl{safe_cast<const ResourceDirectory>(data, fileOffset)};
if (rsrcTbl->id_count < 1)
throw ls::error("RT_RCDATA directory does not have enough entries");
throw std::runtime_error("RT_RCDATA directory does not have enough entries");
// collect all resources
// Collect all resources
fileOffset += sizeof(ResourceDirectory);
const auto rsrcTblEntries = span_cast<const ResourceDirectoryEntry>(
data, fileOffset, rsrcTbl->name_count + rsrcTbl->id_count);
std::unordered_map<uint32_t, std::vector<uint8_t>> resources;
const auto rsrcTblEntries{span_cast<const ResourceDirectoryEntry>(
data, fileOffset, rsrcTbl->name_count + rsrcTbl->id_count)};
std::unordered_map<uint32_t, std::vector<uint32_t>> resources;
resources.reserve(rsrcTbl->id_count);
for (const auto& rsrcTblEntry : rsrcTblEntries) {
if ((rsrcTblEntry.offset & 0x80000000) == 0)
throw ls::error("expected resource directory, found data entry");
throw std::runtime_error("Expected resource directory, found data entry");
// skip over language directory
// Skip over language directory
fileOffset = rsrc_offset.value() + (rsrcTblEntry.offset & 0x7FFFFFFF);
const auto* langDir = safe_cast<const ResourceDirectory>(data, fileOffset);
if (langDir->id_count < 1)
throw ls::error("Incorrect language directory");
throw std::runtime_error("Malformed language directory");
fileOffset += sizeof(ResourceDirectory);
const auto* langDirEntry = safe_cast<const ResourceDirectoryEntry>(data, fileOffset);
const auto* langDirEntry{safe_cast<const ResourceDirectoryEntry>(data, fileOffset)};
if ((langDirEntry->offset & 0x80000000) != 0)
throw ls::error("expected resource data entry, but found directory");
throw std::runtime_error("Expected resource data entry, found directory");
// parse resource data entry
// Parse resource data entry
fileOffset = rsrc_offset.value() + (langDirEntry->offset & 0x7FFFFFFF);
const auto* entry = safe_cast<const ResourceDataEntry>(data, fileOffset);
const auto* entry{safe_cast<const ResourceDataEntry>(data, fileOffset)};
if (entry->offset < rsrc_rva || entry->offset > (rsrc_rva + rsrc_size))
throw ls::error("resource data entry points outside resource section");
throw std::runtime_error("Resource data entry points outside resource section");
// extract resource
std::vector<uint8_t> resource(entry->size);
// Extract resource
fileOffset = (entry->offset - rsrc_rva) + rsrc_offset.value();
if (fileOffset + entry->size > data.size())
throw ls::error("resource data entry points outside file");
std::copy_n(&data.at(fileOffset), entry->size, resource.data());
resources.emplace(rsrcTblEntry.id, std::move(resource));
const auto rdata{span_cast<const uint32_t>(
data, fileOffset, entry->size / sizeof(uint32_t))};
resources.emplace(rsrcTblEntry.id, std::vector<uint32_t>(rdata.begin(), rdata.end()));
}
return resources;

View file

@ -0,0 +1,23 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include <cstdint>
#include <filesystem>
#include <unordered_map>
#include <vector>
namespace lsfgvk::library::priv {
///
/// Parse all resources from a DLL file
///
/// @param dll File path
/// @returns Map of resource ID to data
/// @throws std::runtime_error if the file is invalid or cannot be read
///
std::unordered_map<uint32_t, std::vector<uint32_t>> parseDll(
const std::filesystem::path& dll
);
}

View file

@ -0,0 +1,838 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "pipeline.hpp"
#include "library.hpp"
#include "modules/pipeline/signature.hpp"
#include "modules/pipeline/signature/helpers.hpp"
#include "modules/pipeline/signature/image.hpp"
#include "modules/pipeline/signature/pass.hpp"
#include "utility/vkhelper.hpp"
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <numeric>
#include <stdexcept>
#include <string>
#include <string_view>
#include <unordered_map>
#include <utility>
#include <vector>
using namespace lsfgvk::pipeline;
namespace {
/// Helper method to apply extent operations
vk::Extent2D apply(
const vk::Extent2D& base,
const vk::Extent2D& flow,
const ExtentOp& op
) {
vk::Extent2D result{op.flow() ? flow : base};
for (const auto& [add, shift] : op.operations()) {
result.width = (result.width + add) >> shift;
result.height = (result.height + add) >> shift;
}
return { result.width, result.height };
}
}
Pipeline::Pipeline(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
const vk::Queue& queue,
uint32_t queueFamilyIndex,
const library::ShaderLibrary& library,
const PipelineSignature& signature,
vk::Extent2D extent,
float flow,
bool perf,
bool hdr
) {
// Build the Vulkan descriptor set layout
uint32_t sampledImageCount{};
uint32_t storageImageCount{};
std::vector<vk::DescriptorSetLayoutBinding> bindings;
bindings.reserve(4 + signature.descriptors.size());
bindings.push_back({
.binding = 0,
.descriptorType = vk::DescriptorType::eUniformBuffer,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute
});
for (uint32_t i = 1; i <= 3; i++) {
bindings.push_back({
.binding = i,
.descriptorType = vk::DescriptorType::eSampler,
.descriptorCount = 1,
.stageFlags = vk::ShaderStageFlagBits::eCompute
});
}
uint32_t bindingIdx{4};
for (const auto& binding : signature.descriptors) {
uint32_t descriptorCount{static_cast<uint32_t>(binding.resources.size())};
if (descriptorCount == 1) {
const auto& image{signature.images.at(binding.resources.front())};
if (image.flags & ImageFlag::Mipmaps)
descriptorCount = image.count;
}
bindings.push_back({
.binding = bindingIdx++,
.descriptorType = binding.type == BindingType::StorageImage ?
vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage,
.descriptorCount = descriptorCount,
.stageFlags = vk::ShaderStageFlagBits::eCompute
});
if (binding.type == BindingType::StorageImage)
storageImageCount += descriptorCount;
else
sampledImageCount += descriptorCount;
}
auto [layout, pipelineLayout] = vkhelper::createLayout(
dld,
device,
bindings,
sizeof(PushConstants)
);
this->m_layout = {
.layout = std::move(layout),
.pipelineLayout = std::move(pipelineLayout)
};
// Create the Vulkan images
vk::DeviceSize alignment{};
uint32_t types{~0U};
const vk::Extent2D flowExtent{
static_cast<uint32_t>(static_cast<float>(extent.width) * flow),
static_cast<uint32_t>(static_cast<float>(extent.height) * flow)
};
for (const auto& imageSignature : signature.images) {
const auto imageIdx{this->m_images.size()};
auto& image{this->m_images.emplace_back()};
image = {
.signature = imageSignature
};
const bool hasHdrVariant{image.signature.flags & ImageFlag::HdrVariant};
const vk::Format format{
(hasHdrVariant && hdr) ?
static_cast<vk::Format>(image.signature.hdrFormat) :
static_cast<vk::Format>(image.signature.format)
};
const vk::Extent2D baseExtent{apply(extent, flowExtent, image.signature.extentOp)};
const vk::ImageUsageFlags usage{
vk::ImageUsageFlagBits::eStorage | vk::ImageUsageFlagBits::eSampled
};
const bool isMipmapped{image.signature.flags & ImageFlag::Mipmaps};
for (uint32_t i = 0; i < image.signature.count; i++) {
const vk::Extent2D imageExtent{
.width = std::max(baseExtent.width >> i, 1U),
.height = std::max(baseExtent.height >> i, 1U)
};
if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput)) {
const bool isInputOr{image.signature.flags & ImageFlag::ExternalInput};
auto [subimage, allocation] = vkhelper::createExternalImage(
dld,
device,
physdev,
imageExtent,
format,
image.signature.count,
usage |
(isInputOr ?
vk::ImageUsageFlagBits::eTransferDst
: vk::ImageUsageFlagBits::eTransferSrc)
);
if (isInputOr) {
this->m_externalInputs.push_back({
.extent = imageExtent,
.format = format,
.layers = image.signature.count,
.image = *subimage,
.memory = *allocation
});
} else {
this->m_externalOutputs.push_back({
.extent = imageExtent,
.format = format,
.layers = image.signature.count,
.image = *subimage,
.memory = *allocation
});
}
image.subimages.push_back({
.image = std::move(subimage)
});
this->m_externalAllocations[imageIdx] = std::move(allocation);
break; // There can only be one image
}
image.subimages.push_back({
.image = vkhelper::createImage(
dld,
device,
imageExtent,
format,
isMipmapped ? 1 : image.signature.count,
usage
)
});
if (!isMipmapped) {
break;
}
}
for (auto& subimage : image.subimages) {
subimage.memory = device.getImageMemoryRequirements(*subimage.image, dld);
if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput))
break;
alignment = std::max(alignment, subimage.memory.alignment);
types &= subimage.memory.memoryTypeBits;
}
}
if (types == 0)
throw std::runtime_error("No compatible memory type found for pipeline images");
// Fill in image sizes in respect to alignment
for (auto& image : this->m_images) {
if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput))
continue; // External inputs have dedicated allocations
for (const auto& subimage : image.subimages) {
image.size += vkhelper::align(subimage.memory.size, alignment);
}
}
// Calculate optimal-ish allocations using heuristics & greedy fit strategy
std::vector<size_t> images(signature.images.size());
std::iota(images.begin(), images.end(), 0);
std::ranges::sort(images, [&](const auto& a, const auto& b) {
return this->m_images.at(a).size > this->m_images.at(b).size;
});
std::vector<size_t> placements;
for (const auto& imageIdx : images) {
const auto& image{this->m_images.at(imageIdx)};
if (image.signature.flags & (ImageFlag::ExternalInput | ImageFlag::ExternalOutput))
continue;
auto& allocation{
(image.signature.flags & ImageFlag::Pinned)
? this->m_allocations.at(1)
: this->m_allocations.at(0)
};
auto& segment{allocation.segments.emplace_back()};
vk::DeviceSize size{};
for (const auto& subimage : image.subimages) {
const vk::DeviceSize alignedSize{vkhelper::align(subimage.memory.size, alignment)};
segment.subsegments.push_back({
.size = alignedSize,
.offset = size
});
size += alignedSize;
}
if (image.signature.flags & ImageFlag::Pinned) {
segment = {
.imageIdx = imageIdx,
.subsegments = segment.subsegments,
.size = size,
.offset = allocation.size,
};
allocation.size += size;
} else {
const auto lifetime{image.signature.lifetime};
vk::DeviceSize offset{};
for (const auto& otherSegmentIdx : placements) {
const auto& otherSegment{allocation.segments.at(otherSegmentIdx)};
if (otherSegment.imageIdx == imageIdx)
continue; // Skip self
const auto& otherImage{this->m_images.at(otherSegment.imageIdx)};
const auto& otherLifetime{otherImage.signature.lifetime};
if (lifetime.first > otherLifetime.second ||
lifetime.second < otherLifetime.first)
continue; // Skip horizontally non-overlapping
if (offset >= (otherSegment.offset + otherSegment.size) ||
otherSegment.offset >= (offset + size))
continue; // Skip vertically non-overlapping
offset = otherSegment.offset + otherSegment.size;
}
allocation.size = std::max(allocation.size, offset + size);
segment = {
.imageIdx = imageIdx,
.subsegments = segment.subsegments,
.size = size,
.offset = offset,
};
const size_t i{allocation.segments.size() - 1};
auto it{std::ranges::upper_bound(placements, i,
[&](const auto& a, const auto& b) {
return allocation.segments.at(a).offset < allocation.segments.at(b).offset;
}
)};
placements.insert(it, i);
}
}
// Allocate the memory & bind the images
for (auto& allocation : this->m_allocations) {
allocation.memory = vkhelper::allocateMemory(
dld,
device,
physdev,
allocation.size,
types
);
for (const auto& segment : allocation.segments) {
const auto& image{this->m_images.at(segment.imageIdx)};
for (size_t i = 0; i < image.subimages.size(); i++) {
const auto& subsegment{segment.subsegments.at(i)};
const auto& subimage{image.subimages.at(i)};
device.bindImageMemory(
*subimage.image,
*allocation.memory,
segment.offset + subsegment.offset,
dld
);
}
}
}
// Create image views
for (auto& image : this->m_images) {
const bool hasHdrVariant{image.signature.flags & ImageFlag::HdrVariant};
const bool isLayered{image.subimages.size() == 1 && image.signature.count > 1};
for (auto& subimage : image.subimages) {
subimage.view = vkhelper::createImageView(
dld,
device,
*subimage.image,
static_cast<vk::Format>((hasHdrVariant && hdr)
? image.signature.hdrFormat : image.signature.format),
isLayered ? image.signature.count : 1
);
}
}
// Create the descriptor set & required resources
auto [pool, set] = vkhelper::createDescriptorSet(
dld,
device,
*this->m_layout.layout,
3, 1, sampledImageCount, storageImageCount
);
this->m_descriptorSet.pool = std::move(pool);
this->m_descriptorSet.set = set;
const UniformBuffer buf{
.advancedColorKind = hdr ? 2U : 0U,
.hdrSupport = hdr ? 1U : 0U,
.resolutionInvScale = 1.0F / flow,
.uiThreshold = 0.5F
};
this->m_descriptorSet.buffer = vkhelper::createBuffer(
dld,
device,
physdev,
buf
);
auto* mapped{static_cast<UniformBuffer*>(
device.mapMemory(
*this->m_descriptorSet.buffer.second,
0,
VK_WHOLE_SIZE,
{},
dld
)
)};
this->m_descriptorSet.mappedBuffer = std::shared_ptr<UniformBuffer*>(
new UniformBuffer*{mapped},
[device, memory = *this->m_descriptorSet.buffer.second, dld](auto* ptr) {
device.unmapMemory(memory, dld);
delete ptr; // NOLINT (manual memory management)
}
);
this->m_descriptorSet.samplers.at(0) = vkhelper::createSampler(
dld,
device,
vk::SamplerAddressMode::eClampToBorder,
vk::CompareOp::eNever,
false
);
this->m_descriptorSet.samplers.at(1) = vkhelper::createSampler(
dld,
device,
vk::SamplerAddressMode::eClampToBorder,
vk::CompareOp::eNever,
true
);
this->m_descriptorSet.samplers.at(2) = vkhelper::createSampler(
dld,
device,
vk::SamplerAddressMode::eClampToEdge,
vk::CompareOp::eAlways,
false
);
// Update descriptor set bindings
std::vector<vk::WriteDescriptorSet> writeInfos(4 + signature.descriptors.size());
bindingIdx = 0;
std::array<vk::DescriptorBufferInfo, 1> bufferInfos;
bufferInfos.at(0) = {
.buffer = *this->m_descriptorSet.buffer.first,
.range = VK_WHOLE_SIZE
};
writeInfos.at(0) = {
.dstSet = this->m_descriptorSet.set,
.dstBinding = bindingIdx++,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eUniformBuffer,
.pBufferInfo = bufferInfos.data()
};
std::array<vk::DescriptorImageInfo, 3> samplerInfos;
for (uint32_t i = 0; i < 3; i++) {
auto& writeInfo{writeInfos.at(bindingIdx)};
samplerInfos.at(i) = {
.sampler = *this->m_descriptorSet.samplers.at(i)
};
writeInfo = {
.dstSet = this->m_descriptorSet.set,
.dstBinding = bindingIdx++,
.descriptorCount = 1,
.descriptorType = vk::DescriptorType::eSampler,
.pImageInfo = &samplerInfos.at(i)
};
}
std::vector<std::vector<vk::DescriptorImageInfo>> imageInfos2D(signature.descriptors.size());
for (const auto& binding : signature.descriptors) {
auto& writeInfo{writeInfos.at(bindingIdx)};
auto& imageInfos{imageInfos2D.at(bindingIdx - 4)};
imageInfos.reserve(binding.resources.size());
for (const auto& resourceIdx : binding.resources) {
const auto& image{this->m_images.at(resourceIdx)};
for (const auto& subimage : image.subimages) {
imageInfos.push_back({
.imageView = *subimage.view,
.imageLayout = vk::ImageLayout::eGeneral
});
}
}
writeInfo = {
.dstSet = this->m_descriptorSet.set,
.dstBinding = bindingIdx++,
.descriptorCount = static_cast<uint32_t>(imageInfos.size()),
.descriptorType = binding.type == BindingType::StorageImage ?
vk::DescriptorType::eStorageImage : vk::DescriptorType::eSampledImage,
.pImageInfo = imageInfos.data()
};
}
device.updateDescriptorSets(writeInfos, {}, dld);
// Build all shader pipelines
std::vector<vk::ComputePipelineCreateInfo> pipelineCreateInfos;
for (const auto& [name, variant] : signature.shaders) {
std::string name2{name};
if (variant) name2 += hdr ? "_16bit" : "_8bit";
const auto& module{library.shader(name2, perf)};
pipelineCreateInfos.push_back({
.stage = {
.stage = vk::ShaderStageFlagBits::eCompute,
.module = *module,
.pName = "main"
},
.layout = *this->m_layout.pipelineLayout
});
}
const std::string_view cacheTag{perf ? "performance" : "quality"};
auto [cache, isCacheValid] = vkhelper::createPipelineCache(
dld,
device,
physdev,
cacheTag
);
this->m_cache = std::move(cache);
std::vector<vk::UniquePipeline> pipelines{
device.createComputePipelinesUnique(
*this->m_cache,
pipelineCreateInfos,
nullptr,
dld
).value
};
if (!isCacheValid) {
vkhelper::persistPipelineCache(
dld,
device,
physdev,
*this->m_cache,
cacheTag
);
}
this->m_pipelines.reserve(signature.shaders.size());
for (size_t i = 0; i < signature.shaders.size(); i++) {
const auto& name{signature.shaders.at(i).first};
this->m_pipelines.emplace(name, std::move(pipelines.at(i)));
}
// Build pipeline stages
std::unordered_map<std::string_view, uint32_t> indices;
for (const auto& stageSignature : signature.stages) {
auto& stage{this->m_stages.emplace_back()};
stage.substages.emplace_back();
for (const auto& passIdx : stageSignature.passes) { // (Sorted by shader)
const auto& pass{signature.passes.at(passIdx)};
for (const auto& resource : pass.inputs) {
if (!resource.idx())
continue;
stage.sampledImages.push_back(*resource.idx());
}
for (const auto& resource : pass.outputs) {
if (!resource.idx())
continue;
stage.storageImages.push_back(*resource.idx());
}
auto& lastPipeline{stage.substages.back().pipeline};
if (!lastPipeline.empty() && lastPipeline != pass.shader) {
stage.substages.emplace_back();
}
auto& substage{stage.substages.back()};
substage.pipeline = pass.shader;
substage.subiterations.push_back({
.iterationIndex = indices[substage.pipeline]++,
.dispatch = apply(extent, flowExtent, pass.dispatchOp),
.isSpecial = pass.flags & PassFlag::Special
});
}
}
// Transition all images into general layout
this->m_pool = vkhelper::createCommandPool(
dld,
device,
queueFamilyIndex
);
std::vector<vk::ImageMemoryBarrier2KHR> barriers;
for (const auto& image : this->m_images) {
for (const auto& subimage : image.subimages) {
barriers.push_back({
.newLayout = vk::ImageLayout::eGeneral,
.image = *subimage.image,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = image.subimages.size() == 1 ? image.signature.count : 1
}
});
}
}
const auto layoutCmdbuf{
vkhelper::createCommandBuffer(dld, device, *this->m_pool)
};
layoutCmdbuf->begin({ .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit }, dld);
layoutCmdbuf->pipelineBarrier2KHR({
.imageMemoryBarrierCount = static_cast<uint32_t>(barriers.size()),
.pImageMemoryBarriers = barriers.data()
}, dld);
layoutCmdbuf->end(dld);
const auto fence{device.createFenceUnique({}, nullptr, dld)};
queue.submit(
{{
.commandBufferCount = 1,
.pCommandBuffers = &*layoutCmdbuf
}},
*fence,
dld
);
if (device.waitForFences(*fence, VK_TRUE, 50'000'000, dld) != vk::Result::eSuccess) {
throw std::runtime_error("Failed to wait for image layout transition fence");
}
for (size_t i = 0; i < signature.splitIndices.size() + 1; i++) {
auto& cmdbuf{this->m_cmdbufs.emplace_back()};
cmdbuf = vkhelper::createCommandBuffer(dld, device, *this->m_pool);
cmdbuf->begin({ .flags = vk::CommandBufferUsageFlagBits::eSimultaneousUse }, dld);
cmdbuf->bindDescriptorSets(
vk::PipelineBindPoint::eCompute,
*this->m_layout.pipelineLayout,
0,
this->m_descriptorSet.set,
{},
dld
);
}
size_t currentStageIndex{0};
size_t currentStageBound{
signature.splitIndices.empty() ? signature.passes.size() : signature.splitIndices.front()
};
std::vector<vk::ImageMemoryBarrier2KHR> barrierVector;
barrierVector.reserve(16);
std::unordered_map<VkImage, vk::ImageMemoryBarrier2KHR> stageBarriers;
for (size_t i = 0; i < this->m_stages.size(); i++) {
if (i == currentStageBound) {
currentStageIndex++;
currentStageBound = currentStageIndex < signature.splitIndices.size() ?
signature.splitIndices.at(currentStageIndex) : signature.passes.size();
}
const auto& stage{this->m_stages.at(i)};
const auto& cmdbuf{this->m_cmdbufs.at(currentStageIndex)};
// Append barriers for this stage
for (const auto& sampledImage : stage.sampledImages) {
const auto& image = this->m_images.at(sampledImage);
for (const auto& subimage : image.subimages) {
auto imageHandle{static_cast<const VkImage>(*subimage.image)}; // NOLINT (32-bit)
if (stageBarriers.contains(imageHandle)) {
stageBarriers[imageHandle].dstAccessMask = vk::AccessFlagBits2::eShaderRead;
continue;
}
stageBarriers[imageHandle] = {
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.srcAccessMask = vk::AccessFlagBits2::eNone,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
.image = *subimage.image,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = image.subimages.size() == 1 ? image.signature.count : 1
}
};
}
}
for (const auto& storageImage : stage.storageImages) {
const auto& image = this->m_images.at(storageImage);
for (const auto& subimage : image.subimages) {
auto imageHandle{static_cast<const VkImage>(*subimage.image)}; // NOLINT (32-bit)
if (stageBarriers.contains(imageHandle)) {
stageBarriers[imageHandle].dstAccessMask = vk::AccessFlagBits2::eShaderWrite;
continue;
}
stageBarriers[imageHandle] = {
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.srcAccessMask = vk::AccessFlagBits2::eNone,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderWrite,
.image = *subimage.image,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = image.subimages.size() == 1 ? image.signature.count : 1
}
};
}
}
barrierVector.clear();
for (const auto& [_, barrier] : stageBarriers) // NOLINT (nondeterministic order)
barrierVector.push_back(barrier);
stageBarriers.clear();
cmdbuf->pipelineBarrier2KHR({
.imageMemoryBarrierCount = static_cast<uint32_t>(barrierVector.size()),
.pImageMemoryBarriers = barrierVector.data()
}, dld);
for (const auto& substage : stage.substages) {
// Bind shader pipeline for this stage
const auto& pipeline = this->m_pipelines.at(substage.pipeline);
cmdbuf->bindPipeline(vk::PipelineBindPoint::eCompute, *pipeline, dld);
// Dispatch all subiterations for this stage
for (const auto& subiteration : substage.subiterations) {
const PushConstants pushConstants{
.specialFlag = subiteration.isSpecial ? 1U : 0U,
.subiteration = subiteration.iterationIndex
};
cmdbuf->pushConstants(
*this->m_layout.pipelineLayout,
vk::ShaderStageFlagBits::eCompute,
0,
sizeof(PushConstants),
&pushConstants,
dld
);
const auto& dispatch{subiteration.dispatch};
cmdbuf->dispatch(dispatch.width, dispatch.height, 1, dld);
}
}
// Append barriers for next stage
for (const auto& sampledImage : stage.sampledImages) {
const auto& image = this->m_images.at(sampledImage);
for (const auto& subimage : image.subimages) {
stageBarriers[static_cast<VkImage>(*subimage.image)] = {
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.srcAccessMask = vk::AccessFlagBits2::eShaderRead,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
.image = *subimage.image,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = image.subimages.size() == 1 ? image.signature.count : 1
}
};
}
}
for (const auto& storageImage : stage.storageImages) {
const auto& image = this->m_images.at(storageImage);
for (const auto& subimage : image.subimages) {
stageBarriers[static_cast<VkImage>(*subimage.image)] = {
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.srcAccessMask = vk::AccessFlagBits2::eShaderWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eShaderRead,
.image = *subimage.image,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = image.subimages.size() == 1 ? image.signature.count : 1
}
};
}
}
// Skip barriers on switch between passes
if (i + 1 == currentStageBound) {
stageBarriers.clear();
}
}
for (auto& cmdbuf : this->m_cmdbufs) {
cmdbuf->end(dld);
}
}
vk::CommandBuffer Pipeline::buildTransCmdbuf(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
uint32_t iteration,
uint32_t index,
uint32_t total
) {
const bool persist{total > 8};
const uint64_t key{persist ? ((static_cast<uint64_t>(index) << 32) | total) : index};
if (persist && this->m_transCmdbufs.contains(key))
return *this->m_transCmdbufs.at(key);
auto& cmdbuf{this->m_transCmdbufs[key]};
cmdbuf = vkhelper::createCommandBuffer(
dld,
device,
*this->m_pool
);
cmdbuf->begin({
.flags = persist ? vk::CommandBufferUsageFlagBits::eSimultaneousUse :
vk::CommandBufferUsageFlagBits::eOneTimeSubmit
}, dld);
vk::BufferMemoryBarrier2KHR barrier{
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.srcAccessMask = vk::AccessFlagBits2::eUniformRead,
.dstStageMask = vk::PipelineStageFlagBits2::eTransfer,
.dstAccessMask = vk::AccessFlagBits2::eTransferWrite,
.buffer = *this->m_descriptorSet.buffer.first,
.size = 4
};
cmdbuf->pipelineBarrier2KHR({
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &barrier
}, dld);
const UniformBuffer buf{
.timestamp = static_cast<float>(index + 1) / static_cast<float>(total + 1),
.iteration = iteration
};
cmdbuf->updateBuffer(
*this->m_descriptorSet.buffer.first,
0,
4,
static_cast<const void*>(&buf.timestamp),
dld
);
barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eTransfer,
.srcAccessMask = vk::AccessFlagBits2::eTransferWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.dstAccessMask = vk::AccessFlagBits2::eUniformRead,
.buffer = *this->m_descriptorSet.buffer.first,
.size = 4
};
cmdbuf->pipelineBarrier2KHR({
.bufferMemoryBarrierCount = 1,
.pBufferMemoryBarriers = &barrier
}, dld);
cmdbuf->end(dld);
return *cmdbuf;
}

View file

@ -0,0 +1,225 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "library.hpp"
#include "pipeline/signature.hpp"
#include "pipeline/signature/image.hpp"
#include "utility/vkhelper.hpp"
#include <array>
#include <cstddef>
#include <cstdint>
#include <memory>
#include <string_view>
#include <unordered_map>
#include <utility>
#include <vector>
namespace lsfgvk::pipeline {
/// Handle to an external image
struct ExternalImage {
/// Image Extent
vk::Extent2D extent;
/// Image Format
vk::Format format;
/// Amount of layers in image
uint32_t layers;
/// Handle to the Vulkan image (not owned)
vk::Image image;
/// Handle to the Vulkan memory (not owned)
vk::DeviceMemory memory;
};
/// Struct for the uniform buffer
struct UniformBuffer {
float timestamp;
uint32_t iteration;
uint32_t advancedColorKind;
uint32_t hdrSupport;
float resolutionInvScale;
float uiThreshold;
};
/// Struct for push constants
struct PushConstants {
uint32_t specialFlag;
uint32_t subiteration;
};
///
/// Vulkan pipeline created from a signature
///
class Pipeline {
public:
///
/// Create a new pipeline
///
/// @param dld Vulkan dispatch loader
/// @param device Vulkan device
/// @param physdev Vulkan physical device
/// @param queue Vulkan compute queue
/// @param queueFamilyIndex Compute queue family index
/// @param library Shader library
/// @param signature Pipeline signature
/// @param extent Base extent
/// @param flow Flow scale
/// @param perf Performance mode
/// @param hdr HDR variant
/// @throws std::runtime_error on failure
///
explicit Pipeline(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
const vk::Queue& queue,
uint32_t queueFamilyIndex,
const library::ShaderLibrary& library,
const PipelineSignature& signature,
vk::Extent2D extent,
float flow,
bool perf,
bool hdr
);
///
/// Get all external input images
///
/// @return List of images
///
[[nodiscard]] auto& getExternalInputs() const {
return this->m_externalInputs;
}
/// Get all external output images
[[nodiscard]] auto& getExternalOutputs() const {
return this->m_externalOutputs;
}
///
/// Get the mapped uniform buffer
///
/// @return Mapped uniform buffer
///
[[nodiscard]] auto* getMappedBuffer() const {
return *this->m_descriptorSet.mappedBuffer.get();
}
///
/// Get all command buffers
///
/// @return List of command buffers
///
[[nodiscard]] auto& getCmdbufs() const {
return this->m_cmdbufs;
}
///
/// Build a transition command buffer
///
/// @param dld Vulkan dispatch loader
/// @param device Vulkan device
/// @param iteration Current iteration
/// @param index Index of the iteration
/// @param total Total iterations
/// @return Command buffer handle
///
vk::CommandBuffer buildTransCmdbuf(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
uint32_t iteration,
uint32_t index,
uint32_t total
);
private:
/// Vulkan descriptor set & pipeline layout
struct Layout {
vk::UniqueDescriptorSetLayout layout;
vk::UniquePipelineLayout pipelineLayout;
};
Layout m_layout;
/// Sub-image of a Vulkan image
struct SubImage {
vk::UniqueImage image;
vk::MemoryRequirements memory;
vk::UniqueImageView view;
};
/// Vulkan image created from an ImageSignature
struct Image {
ImageSignature signature;
std::vector<SubImage> subimages;
vk::DeviceSize size{};
};
std::vector<Image> m_images;
std::vector<ExternalImage> m_externalInputs;
std::vector<ExternalImage> m_externalOutputs;
/// Memory allocation sub-segment
struct MemorySubSegment {
vk::DeviceSize size{};
vk::DeviceSize offset{}; // Offset in memory segment
};
/// Memory allocation segment
struct MemorySegment {
size_t imageIdx{};
std::vector<MemorySubSegment> subsegments;
vk::DeviceSize size{};
vk::DeviceSize offset{}; // Offset in allocation
};
/// Memory allocation info
struct AllocationInfo {
vk::UniqueDeviceMemory memory;
std::vector<MemorySegment> segments;
vk::DeviceSize size{};
};
std::array<AllocationInfo, 2> m_allocations;
std::unordered_map<size_t, vk::UniqueDeviceMemory> m_externalAllocations;
/// Vulkan descriptor set
struct DescriptorSet {
vk::UniqueDescriptorPool pool;
vk::DescriptorSet set; // Can not be freed
std::pair<vk::UniqueBuffer, vk::UniqueDeviceMemory> buffer;
std::shared_ptr<UniformBuffer*> mappedBuffer;
std::array<vk::UniqueSampler, 3> samplers;
};
DescriptorSet m_descriptorSet;
vk::UniquePipelineCache m_cache;
std::unordered_map<std::string_view, vk::UniquePipeline> m_pipelines;
/// Single iteration of a sub-stage
struct SubIteration {
uint32_t iterationIndex{};
vk::Extent2D dispatch;
bool isSpecial{};
};
/// Sub-stage of an execution stage
struct SubStage {
std::string_view pipeline;
std::vector<SubIteration> subiterations;
};
/// Execution stage
struct Stage {
std::vector<SubStage> substages;
std::vector<size_t> sampledImages;
std::vector<size_t> storageImages;
};
std::vector<Stage> m_stages;
vk::UniqueCommandPool m_pool;
std::vector<vk::UniqueCommandBuffer> m_cmdbufs;
std::unordered_map<uint64_t, vk::UniqueCommandBuffer> m_transCmdbufs;
};
}

View file

@ -0,0 +1,340 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "signature/helpers.hpp"
#include "signature/image.hpp"
#include "signature/pass.hpp"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <numeric>
#include <optional>
#include <ranges>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
namespace lsfgvk::pipeline {
/// Type of a descriptor set binding
enum class BindingType : uint8_t {
SampledImage,
StorageImage
};
/// Signature of a descriptor set binding
struct BindingSignature {
/// Type of binding
BindingType type{ BindingType::SampledImage };
/// Resources attached to binding
inplace_vector<size_t, 16> resources;
};
/// Signature of a pipeline stage
struct StageSignature {
/// Passes executed this stage
inplace_vector<size_t, 8> passes;
};
///
/// Signature of a compute pipeline
///
struct PipelineSignature {
/// Shader names used by the pipeline (and if there are hdr variants)
inplace_vector<std::pair<std::string_view, bool>, 32> shaders;
/// Images used by the pipeline
inplace_vector<ImageSignature, 192> images;
/// Ordered set of bindings for the descriptor set
inplace_vector<BindingSignature, 192> descriptors;
/// Indexable list of all passes
inplace_vector<PassSignature, 100> passes;
/// Ordered list of stages, excecuted in sequence
inplace_vector<StageSignature, 100> stages;
/// Stage index where the command buffers are split
inplace_vector<size_t, 4> splitIndices;
};
///
/// The signature of a compute pipeline
///
class PipelineSignatureBuilder {
public:
///
/// Create a new empty signature builder
///
explicit PipelineSignatureBuilder() = default;
///
/// Register an image
///
/// @param image Image signature
/// @return Handle to the image
///
consteval size_t registerImage(ImageSignature image) {
this->m_images.push_back(std::move(image));
return this->m_images.size() - 1;
}
///
/// Append a pass
///
/// @param pass Pass signature
/// @return Handle to the pass
///
consteval size_t appendPass(PassSignature pass) {
this->m_passes.push_back(std::move(pass));
return this->m_passes.size() - 1;
}
///
/// Split the command buffer
///
consteval void split() {
this->m_splitIndices.emplace_back(this->m_passes.size());
}
///
/// Compute a pipeline signature
///
/// @throws const char* on failure
/// @return Pipeline siganture
///
consteval PipelineSignature finalize() {
PipelineSignature s{};
struct ShaderInfo {
std::string_view id;
bool hasHdrVariant{};
size_t sampledImageBindings{}; // Only the amount suffices here
std::vector<std::vector<size_t>> storageImageBindings;
};
std::vector<ShaderInfo> shaderInfos;
// Populate shader map with empty bindings
for (const auto& pass : this->m_passes) {
const auto it{std::ranges::find_if(shaderInfos, [&pass](const auto& shader) {
return shader.id == pass.shader;
})};
const bool firstOccurrence{it == shaderInfos.end()};
const bool isAggregatePass{pass.flags & PassFlag::Aggregate};
auto& shader{firstOccurrence ? shaderInfos.emplace_back() : *it};
if (firstOccurrence) {
shader.id = pass.shader;
shader.hasHdrVariant = pass.flags & PassFlag::HdrVariant;
shader.sampledImageBindings = pass.inputs.size();
shader.storageImageBindings.resize(pass.outputs.size());
}
// Ensure consistent usage aross invocations
if (!firstOccurrence && !isAggregatePass)
throw "Shader \"" + std::string(pass.shader) + "\" is used by "
"multiple passes but does not have the Aggregate flag set";
if (shader.sampledImageBindings != pass.inputs.size())
throw "Shader \"" + std::string(pass.shader) + "\" has "
"inconsistent read counts across passes";
if (shader.storageImageBindings.size() != pass.outputs.size())
throw "Shader \"" + std::string(pass.shader) + "\" has "
"inconsistent write counts across passes";
// Collect all used resources written by this shader
for (size_t i = 0; i < pass.outputs.size(); i++) {
const auto& resource{pass.outputs.at(i)};
if (!resource.idx())
continue;
const auto& image{this->m_images.at(*resource.idx())};
if (isAggregatePass && (image.flags & ImageFlag::Mipmaps) && !resource.layer())
throw "Pass \"" + std::string(pass.shader) + "\" has "
"Aggregate flag but fully writes to an image with Mipmaps flag";
shader.storageImageBindings.at(i).push_back(*resource.idx());
}
}
// Create descriptors for all resources
for (size_t i = 0; i < this->m_images.size(); i++) {
const auto& image{this->m_images.at(i)};
if (image.flags & ImageFlag::ExternalInput) {
s.descriptors.push_back({
.type = BindingType::SampledImage,
.resources = { i }
});
}
}
for (const auto& shader : shaderInfos) {
for (const auto& resources : shader.storageImageBindings) {
s.descriptors.push_back({
.type = BindingType::StorageImage,
.resources = resources
});
// Skip sampled image bindings for external outputs
const auto& image{this->m_images.at(resources.front())};
if (image.flags & ImageFlag::ExternalOutput)
continue;
s.descriptors.push_back({
.type = BindingType::SampledImage,
.resources = resources
});
}
}
// Calculate pipeline stages by reordering passes with dependencies as constraints
std::vector<size_t> writtenImages;
for (size_t i = 0; i < this->m_images.size(); i++) {
const auto& image{this->m_images.at(i)};
if (image.flags & ImageFlag::ExternalInput)
writtenImages.push_back(i);
}
std::vector<size_t> remainingPasses(this->m_passes.size());
std::iota(remainingPasses.begin(), remainingPasses.end(), 0);
size_t currentStageIndex{0};
std::pair<size_t, size_t> currentStageBounds{
0,
this->m_splitIndices.empty() ? this->m_passes.size() : this->m_splitIndices.front()
};
while (!remainingPasses.empty()) {
auto& currentStage{s.stages.emplace_back()};
// Find all passes that may be executed next
std::vector<size_t> validPasses{};
for (const auto& passIdx : remainingPasses) {
if (passIdx < currentStageBounds.first || passIdx >= currentStageBounds.second)
continue; // Skip passes that are not in the current stage
const auto& pass{this->m_passes.at(passIdx)};
bool isValid{true};
for (const auto& image : pass.inputs) {
if (!image.idx())
continue;
if (std::ranges::find(writtenImages, *image.idx()) != writtenImages.end())
continue;
isValid = false;
break;
}
if (!isValid)
continue;
validPasses.push_back(passIdx);
}
// If no valid pass exists in the current stage, move on to the next stage
if (validPasses.empty() && currentStageIndex < this->m_splitIndices.size()) {
currentStageIndex++;
currentStageBounds = {
currentStageBounds.second,
currentStageIndex < this->m_splitIndices.size() ?
this->m_splitIndices.at(currentStageIndex) : this->m_passes.size()
};
s.stages.pop_back();
s.splitIndices.emplace_back(s.stages.size());
continue;
}
// Sort valid passes by shader name
auto begin = std::ranges::begin(validPasses);
auto end = std::ranges::end(validPasses);
for (auto i = begin; i != end; i++) {
std::rotate(
std::upper_bound(begin, i, *i, [this](size_t a, size_t b) {
return this->m_passes.at(a).shader < this->m_passes.at(b).shader;
}),
i, std::next(i)
);
}
// Merge passes into execution step
for (const auto& passIdx : validPasses) {
const auto& pass{this->m_passes.at(passIdx)};
for (const auto& resource : pass.outputs) {
if (!resource.idx())
continue;
writtenImages.push_back(*resource.idx());
}
currentStage.passes.push_back(passIdx);
remainingPasses.erase(std::ranges::find(remainingPasses, passIdx));
}
}
// Calculate usage timeline for each image
for (size_t i = 0; i < this->m_images.size(); i++) {
auto& image{this->m_images.at(i)};
if (image.flags & ImageFlag::Pinned)
continue;
std::optional<size_t> writeIndex;
std::optional<size_t> readIndex;
// Find the first stage that writes to the image and last stage that reads from it
for (size_t j = 0; j < s.stages.size(); j++) {
const auto& stage{s.stages.at(j)};
for (const auto& passIdx : stage.passes) {
const auto& pass{this->m_passes.at(passIdx)};
const bool isRead{
std::ranges::any_of(pass.inputs, [i](const auto& resource) {
return resource.idx() && *resource.idx() == i;
})
};
const bool isWritten{
std::ranges::any_of(pass.outputs, [i](const auto& resource) {
return resource.idx() && *resource.idx() == i;
})
};
if (writeIndex && isWritten)
throw "Image " + std::to_string(i) +
" is written by multiple passes";
if (isWritten && isRead)
throw "Image " + std::to_string(i) +
" is read & write in the same pass";
if (isWritten)
writeIndex.emplace(j);
if (isRead)
readIndex.emplace(std::max(readIndex.value_or(0), j));
}
}
if (!writeIndex)
throw "Image " + std::to_string(i) + " is not written to by any pass";
if (!readIndex)
throw "Image " + std::to_string(i) + " is not read from by any pass";
image.lifetime = { *writeIndex, *readIndex };
}
// Copy remaining resources into signature
for (const auto& shader : shaderInfos)
s.shaders.emplace_back(shader.id, shader.hasHdrVariant);
for (const auto& image : this->m_images)
s.images.push_back(image);
for (const auto& pass : this->m_passes)
s.passes.push_back(pass);
return s;
}
private:
std::vector<ImageSignature> m_images;
std::vector<PassSignature> m_passes;
std::vector<size_t> m_splitIndices;
};
}

View file

@ -0,0 +1,128 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include <algorithm>
#include <array>
#include <cstddef>
#include <cstdint>
#include <initializer_list>
#include <new>
#include <stdexcept>
#include <utility>
#include <vector>
namespace lsfgvk::pipeline {
/// C++26 backported inplace_vector
template<typename T, size_t N>
class inplace_vector {
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
public:
// Constructors
constexpr inplace_vector() = default;
constexpr inplace_vector(std::initializer_list<T> init) {
if (init.size() > N) throw std::bad_alloc();
for (auto& elem : init)
this->m_data.at(this->m_size++) = elem;
}
constexpr inplace_vector(const std::vector<T>& vec) {
if (vec.size() > N) throw std::bad_alloc();
for (const auto& elem : vec)
this->m_data.at(this->m_size++) = elem;
}
// Appending elements
constexpr void push_back(const T& value) {
if (this->m_size >= N) throw std::bad_alloc();
this->m_data.at(this->m_size++) = value;
}
constexpr void push_back(T&& value) {
if (this->m_size >= N) throw std::bad_alloc();
this->m_data.at(this->m_size++) = std::move(value);
}
template<typename... Args>
constexpr T& emplace_back(Args&&... args) {
if (this->m_size >= N) throw std::bad_alloc();
this->m_data.at(this->m_size) = T(std::forward<Args>(args)...);
return this->m_data.at(this->m_size++);
}
constexpr void clear() { this->m_size = 0; }
// Accessing elements
constexpr T& operator[](size_t idx) { return this->m_data.at(idx); }
constexpr const T& operator[](size_t idx) const { return this->m_data.at(idx); }
[[nodiscard]] constexpr T& at(size_t idx) {
if (idx >= this->m_size) throw std::out_of_range("Index out of range");
return this->m_data.at(idx);
}
[[nodiscard]] constexpr const T& at(size_t idx) const {
if (idx >= this->m_size) throw std::out_of_range("Index out of range");
return this->m_data.at(idx);
}
[[nodiscard]] constexpr T& front() { return this->m_data.front(); }
[[nodiscard]] constexpr const T& front() const { return this->m_data.front(); }
[[nodiscard]] constexpr T& back() { return this->m_data.at(this->m_size - 1); }
[[nodiscard]] constexpr const T& back() const { return this->m_data.at(this->m_size - 1); }
// Iterating elements
[[nodiscard]] constexpr T* begin() { return this->m_data.data(); }
[[nodiscard]] constexpr const T* begin() const { return this->m_data.data(); }
[[nodiscard]] constexpr const T* cbegin() const { return this->m_data.data(); }
[[nodiscard]] constexpr T* end() { return this->m_data.data() + this->m_size; } // NOLINT (pointer arithmetic)
[[nodiscard]] constexpr const T* end() const { return this->m_data.data() + this->m_size; } // NOLINT (pointer arithmetic)
[[nodiscard]] constexpr const T* cend() const { return this->m_data.data() + this->m_size; } // NOLINT (pointer arithmetic)
// Removing elements
constexpr void pop_back() {
if (this->m_size == 0) throw std::out_of_range("Vector is empty");
this->m_size--;
}
// Query capacity
[[nodiscard]] constexpr size_t size() const { return this->m_size; }
[[nodiscard]] constexpr size_t capacity() const { return N; }
[[nodiscard]] constexpr bool empty() const { return this->m_size == 0; }
private:
std::array<T, N> m_data{};
size_t m_size{0};
#pragma clang diagnostic pop
};
/// Sequence of operations to apply to the base extent
class ExtentOp {
public:
/// Default constructor for no operations and no flow scaling
constexpr ExtentOp() = default;
/// Constructor for no operations aside from flow scale
constexpr ExtentOp(bool flow) : m_flow(flow) {}
/// Constructor for a single operation
constexpr ExtentOp(bool flow, uint32_t add, uint32_t shift)
: m_flow(flow), m_operations({{add, shift}}) {}
/// Constructor for a single operation starting from the flow base extent
constexpr ExtentOp(uint32_t add, uint32_t shift)
: m_flow(true), m_operations({{add, shift}}) {}
// Combine two extents
constexpr ExtentOp operator+(const ExtentOp& other) const {
ExtentOp result{*this};
for (const auto& [add, shift] : other.m_operations)
result.m_operations.emplace_back(add, shift);
return result;
}
// Combine two extends
constexpr ExtentOp operator+=(const ExtentOp& other) {
for (const auto& [add, shift] : other.m_operations)
this->m_operations.emplace_back(add, shift);
return *this;
}
/// Get the flow value
[[nodiscard]] constexpr auto flow() const { return this->m_flow; }
/// Get the operations
[[nodiscard]] constexpr const auto& operations() const { return this->m_operations; }
private:
bool m_flow{false};
inplace_vector<std::pair<uint32_t, uint32_t>, 8> m_operations;
};
}

View file

@ -0,0 +1,95 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "helpers.hpp"
#include <cstddef>
#include <cstdint>
#include <utility>
namespace lsfgvk::pipeline {
/// All supported image formats
enum class Format : char {
/// Invalid format
Invalid = 0,
/// 8-bit unsigned normalized RGBA format
RGBA8888 = 37, // VK_FORMAT_R8G8B8A8_UNORM
/// 8-bit unsigned normalized R format
R8 = 9, // VK_FORMAT_R8_UNORM
/// 16-bit signed floating point RGBA format
RGBA16161616 = 97, // VK_FORMAT_R16G16B16A16_SFLOAT
};
/// All supported image flags
enum class ImageFlag : char {
/// No special flags
None = 0,
/// Instead of using a single image array, create several individual images with halving
/// extends for each mip level.
///
/// This will cause the image to show up as Texture2D[], rather than Texture2DArray
/// and must therefore not be used in full with passes where the "Aggregate" flag is set.
Mipmaps = 1 << 0,
/// Indicate that the image is pinned & not transient
Pinned = 1 << 1,
/// Indicate that this image is written to externally
ExternalInput = 1 << 2,
/// Indicate that this image is read from externally
ExternalOutput = 1 << 3,
/// Indicate that a separate format should be used for HDR
HdrVariant = 1 << 4
};
/// Helper type for operating on image flags
class ImageFlags {
public:
/// Default constructor
constexpr ImageFlags() = default;
/// Create from single image flag
constexpr ImageFlags(ImageFlag flag) : m_flags(static_cast<int>(flag)) {}
/// Check any set of flags
constexpr operator bool() const { return m_flags != 0; }
/// Combine with another flag
constexpr ImageFlags operator|(ImageFlag flag) const {
return{this->m_flags | static_cast<int>(flag)};
}
/// Match with another flag
constexpr ImageFlags operator&(ImageFlag flag) const {
return{this->m_flags & static_cast<int>(flag)};
}
/// Match with another flag instance
constexpr ImageFlags operator&(ImageFlags other) const {
return{this->m_flags & other.m_flags};
}
private:
int m_flags{static_cast<int>(ImageFlag::None)};
// Create from number
constexpr ImageFlags(int flags) : m_flags(flags) {}
};
/// Compine two image flags
constexpr ImageFlags operator|(ImageFlag lhs, ImageFlag rhs) {
return ImageFlags(lhs) | rhs;
}
/// Signature for an image
struct ImageSignature {
/// Format of the image
Format format{ Format::RGBA8888 };
/// Optional second format for HDR variants
Format hdrFormat{ Format::RGBA16161616 };
/// Optional flags for the image
ImageFlags flags{ ImageFlag::None };
/// Operation applied to the base extent for calculating the image extent
ExtentOp extentOp;
/// Amount of layers in the image
uint32_t count{1};
/// Lifetime of the image (set by pipeline builder)
std::pair<size_t, size_t> lifetime;
};
}

View file

@ -0,0 +1,88 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "helpers.hpp"
#include <cstddef>
#include <cstdint>
#include <optional>
#include <string_view>
namespace lsfgvk::pipeline {
/// All supported pass flags
enum class PassFlag : char {
/// No special flags
None = 0,
/// Indicates the shader will be reused several times and resources must be
/// aggregated into arrays and indexed via push constants.
Aggregate = 1 << 0,
/// Indicate that the special flag is set via push constant.
Special = 1 << 1,
/// Indicate that there are two variants for 8-bit and 16-bit foramtrs
HdrVariant = 1 << 2
};
/// Helper type for operating on pass flags
class PassFlags {
public:
/// Default constructor
constexpr PassFlags() = default;
/// Create from single pass flag
constexpr PassFlags(PassFlag flag) : m_flags(static_cast<int>(flag)) {}
/// Check any set of flags
constexpr operator bool() const { return m_flags != 0; }
/// Combine with another flag
constexpr PassFlags operator|(PassFlag flag) const {
return{this->m_flags | static_cast<int>(flag)};
}
/// Match with another flag
constexpr PassFlags operator&(PassFlag flag) const {
return{this->m_flags & static_cast<int>(flag)};
}
private:
int m_flags{static_cast<int>(PassFlag::None)};
// Create from number
constexpr PassFlags(int flags) : m_flags(flags) {}
};
/// Combine two pass flags
constexpr PassFlags operator|(PassFlag lhs, PassFlag rhs) {
return PassFlags(lhs) | rhs;
}
/// A pointer to an image, or a specific layer inside that image
class Resource {
public:
/// Default constructor
constexpr Resource() = default;
/// Constructor for a full image
constexpr Resource(size_t idx) : m_idx(idx) {}
/// Constructor for a single layer
constexpr Resource(size_t idx, uint32_t layer) : m_idx(idx), m_layer(layer) {}
/// Get the flow value
[[nodiscard]] constexpr auto idx() const { return this->m_idx; }
/// Get the operations
[[nodiscard]] constexpr auto layer() const { return this->m_layer; }
private:
std::optional<size_t> m_idx{0};
std::optional<uint32_t> m_layer;
};
/// Signature of a shader pass
struct PassSignature {
/// Name of the shader
std::string_view shader;
/// Optional flags of this pass
PassFlags flags{ PassFlag::None };
/// Resources to read from
inplace_vector<Resource, 8> inputs;
/// Resources to write to
inplace_vector<Resource, 8> outputs;
/// Operation applied to the base extent for calculating the dispatch extent
ExtentOp dispatchOp;
};
}

View file

@ -1,73 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "alpha0.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <vector>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk::backend;
Alpha0::Alpha0(const Ctx& ctx,
const vk::Image& sourceImage) {
const size_t m = ctx.perf ? 1 : 2; // multiplier
const VkExtent2D halfExtent = backend::add_shift_extent(sourceImage.getExtent(), 1, 1);
const VkExtent2D quarterExtent = backend::add_shift_extent(halfExtent, 1, 1);
// create temporary & output images
this->tempImages0.reserve(m);
this->tempImages1.reserve(m);
for (size_t i = 0; i < m; i++) {
this->tempImages0.emplace_back(ctx.vk, halfExtent);
this->tempImages1.emplace_back(ctx.vk, halfExtent);
}
this->images.reserve(2 * m);
for (size_t i = 0; i < (2 * m); i++)
this->images.emplace_back(ctx.vk, quarterExtent);
// create descriptor sets
const auto& shaders = ctx.perf ? ctx.shaders.get().performance : ctx.shaders.get().quality;
this->sets.reserve(3);
this->sets.emplace_back(ManagedShaderBuilder()
.sampled(sourceImage)
.storages(this->tempImages0)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.alpha.at(0)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages0)
.storages(this->tempImages1)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.alpha.at(1)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages1)
.storages(this->images)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.alpha.at(2)));
// store dispatch extents
this->dispatchExtent0 = backend::add_shift_extent(halfExtent, 7, 3);
this->dispatchExtent1 = backend::add_shift_extent(quarterExtent, 7, 3);
}
void Alpha0::prepare(std::vector<VkImage>& images) const {
for (size_t i = 0; i < this->tempImages0.size(); i++) {
images.push_back(this->tempImages0.at(i).handle());
images.push_back(this->tempImages1.at(i).handle());
}
for (const auto& image : this->images)
images.push_back(image.handle());
}
void Alpha0::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const {
this->sets.at(0).dispatch(vk, cmd, this->dispatchExtent0);
this->sets.at(1).dispatch(vk, cmd, this->dispatchExtent0);
this->sets.at(2).dispatch(vk, cmd, this->dispatchExtent1);
}

View file

@ -1,48 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "../helpers/managed_shader.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <vector>
#include <vulkan/vulkan_core.h>
namespace ctx { struct Ctx; }
namespace lsfgvk::backend {
/// pre-alpha shaderchain
class Alpha0 {
public:
/// create a pre-alpha shaderchain
/// @param ctx context
/// @param sourceImage source image
Alpha0(const Ctx& ctx,
const vk::Image& sourceImage);
/// prepare the shaderchain initially
/// @param images vector to fill with image handles
void prepare(std::vector<VkImage>& images) const;
/// render the pre-alpha shaderchain
/// @param vk the vulkan instance
/// @param cmd command buffer
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const;
/// get the generated images
/// @return vector of images
[[nodiscard]] const auto& getImages() const { return this->images; }
private:
std::vector<vk::Image> tempImages0;
std::vector<vk::Image> tempImages1;
std::vector<vk::Image> images;
std::vector<ManagedShader> sets;
VkExtent2D dispatchExtent0{};
VkExtent2D dispatchExtent1{};
};
}

View file

@ -1,54 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "alpha1.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <vector>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk::backend;
Alpha1::Alpha1(const Ctx& ctx, size_t temporal,
const std::vector<vk::Image>& sourceImages) {
const size_t m = ctx.perf ? 1 : 2; // multiplier
const VkExtent2D quarterExtent = sourceImages.at(0).getExtent();
// create output images for mod3
this->images.reserve(temporal);
for(size_t i = 0; i < temporal; i++) {
auto& vec = this->images.emplace_back();
vec.reserve(2 * m);
for (size_t j = 0; j < (2 * m); j++)
vec.emplace_back(ctx.vk, quarterExtent);
}
// create descriptor sets
const auto& shaders = ctx.perf ? ctx.shaders.get().performance : ctx.shaders.get().quality;
this->sets.reserve(temporal);
for (size_t i = 0; i < temporal; i++)
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(sourceImages)
.storages(this->images.at(i))
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.alpha.at(3)));
// store dispatch extents
this->dispatchExtent = backend::add_shift_extent(quarterExtent, 7, 3);
}
void Alpha1::prepare(std::vector<VkImage>& images) const {
for (const auto& vec : this->images)
for (const auto& img : vec)
images.push_back(img.handle());
}
void Alpha1::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const {
this->sets.at(idx % this->sets.size()).dispatch(vk, cmd, dispatchExtent);
}

View file

@ -1,47 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "../helpers/managed_shader.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <vector>
#include <vulkan/vulkan_core.h>
namespace ctx { struct Ctx; }
namespace lsfgvk::backend {
/// alpha shaderchain
class Alpha1 {
public:
/// create a alpha shaderchain
/// @param ctx context
/// @param temporal temporal count
/// @param sourceImages source images
Alpha1(const Ctx& ctx, size_t temporal,
const std::vector<vk::Image>& sourceImages);
/// prepare the shaderchain initially
/// @param images vector to fill with image handles
void prepare(std::vector<VkImage>& images) const;
/// render the alpha shaderchain
/// @param vk the vulkan instance
/// @param cmd command buffer
/// @param idx frame index
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const;
/// get the generated images
/// @return vector of images
[[nodiscard]] const auto& getImages() const { return this->images; }
private:
std::vector<std::vector<vk::Image>> images;
std::vector<ManagedShader> sets;
VkExtent2D dispatchExtent{};
};
}

View file

@ -1,50 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "beta0.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <vector>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk::backend;
Beta0::Beta0(const Ctx& ctx,
const std::vector<std::vector<vk::Image>>& sourceImages) {
const VkExtent2D extent = sourceImages.at(0).at(0).getExtent();
// create output images
this->images.reserve(2);
for(size_t i = 0; i < 2; i++)
this->images.emplace_back(ctx.vk, extent);
// create descriptor sets
const auto& shader = (ctx.perf ?
ctx.shaders.get().performance : ctx.shaders.get().quality).beta.at(0);
this->sets.reserve(sourceImages.size());
for (size_t i = 0; i < sourceImages.size(); i++)
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(sourceImages.at((i + (sourceImages.size() - 2)) % sourceImages.size()))
.sampleds(sourceImages.at((i + (sourceImages.size() - 1)) % sourceImages.size()))
.sampleds(sourceImages.at(i % sourceImages.size()))
.storages(this->images)
.sampler(ctx.bnwSampler)
.build(ctx.vk, ctx.pool, shader));
// store dispatch extents
this->dispatchExtent = backend::add_shift_extent(extent, 7, 3);
}
void Beta0::prepare(std::vector<VkImage>& images) const {
for (const auto& img : this->images)
images.push_back(img.handle());
}
void Beta0::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const {
this->sets.at(idx % this->sets.size()).dispatch(vk, cmd, dispatchExtent);
}

View file

@ -1,46 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "../helpers/managed_shader.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <vector>
#include <vulkan/vulkan_core.h>
namespace ctx { struct Ctx; }
namespace lsfgvk::backend {
/// beta shaderchain
class Beta0 {
public:
/// create a beta shaderchain
/// @param ctx context
/// @param sourceImages source images
Beta0(const Ctx& ctx,
const std::vector<std::vector<vk::Image>>& sourceImages);
/// prepare the shaderchain initially
/// @param images vector to fill with image handles
void prepare(std::vector<VkImage>& images) const;
/// render the beta shaderchain
/// @param vk vulkan instance
/// @param cmd command buffer
/// @param idx frame index
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const;
/// get the generated images
/// @return vector of images
[[nodiscard]] const auto& getImages() const { return this->images; }
private:
std::vector<vk::Image> images;
std::vector<ManagedShader> sets;
VkExtent2D dispatchExtent{};
};
}

View file

@ -1,81 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "beta1.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <cstdint>
#include <vector>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk::backend;
Beta1::Beta1(const Ctx& ctx,
const std::vector<vk::Image>& sourceImages) {
const VkExtent2D extent = sourceImages.at(0).getExtent();
// create temporary & output images
this->tempImages0.reserve(2);
this->tempImages1.reserve(2);
for(uint32_t i = 0; i < 2; i++) {
this->tempImages0.emplace_back(ctx.vk, extent);
this->tempImages1.emplace_back(ctx.vk, extent);
}
this->images.reserve(6);
for (uint32_t i = 0; i < 6; i++)
this->images.emplace_back(ctx.vk,
backend::shift_extent(extent, i),
VK_FORMAT_R8_UNORM);
// create descriptor sets
const auto& shaders = (ctx.perf ?
ctx.shaders.get().performance : ctx.shaders.get().quality).beta;
this->sets.reserve(4);
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(sourceImages)
.storages(this->tempImages0)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.at(1)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages0)
.storages(this->tempImages1)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.at(2)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages1)
.storages(this->tempImages0)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.at(3)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages0)
.storages(this->images)
.sampler(ctx.bnbSampler)
.buffer(ctx.constantBuffer)
.build(ctx.vk, ctx.pool, shaders.at(4)));
// store dispatch extents
this->dispatchExtent0 = backend::add_shift_extent(extent, 7, 3);
this->dispatchExtent1 = backend::add_shift_extent(extent, 31, 5);
}
void Beta1::prepare(std::vector<VkImage>& images) const {
for (size_t i = 0; i < 2; i++) {
images.push_back(this->tempImages0.at(i).handle());
images.push_back(this->tempImages1.at(i).handle());
}
for (const auto& img : this->images)
images.push_back(img.handle());
}
void Beta1::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const {
this->sets.at(0).dispatch(vk, cmd, this->dispatchExtent0);
this->sets.at(1).dispatch(vk, cmd, this->dispatchExtent0);
this->sets.at(2).dispatch(vk, cmd, this->dispatchExtent0);
this->sets.at(3).dispatch(vk, cmd, this->dispatchExtent1);
}

View file

@ -1,48 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "../helpers/managed_shader.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <vector>
#include <vulkan/vulkan_core.h>
namespace ctx { struct Ctx; }
namespace lsfgvk::backend {
/// beta shaderchain
class Beta1 {
public:
/// create a beta shaderchain
/// @param ctx context
/// @param sourceImages source images
Beta1(const Ctx& ctx,
const std::vector<vk::Image>& sourceImages);
/// prepare the shaderchain initially
/// @param images vector to fill with image handles
void prepare(std::vector<VkImage>& images) const;
/// render the beta shaderchain
/// @param vk the vulkan instance
/// @param cmd command buffer
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const;
/// get the generated images
/// @return vector of images
[[nodiscard]] const auto& getImages() const { return this->images; }
private:
std::vector<vk::Image> tempImages0;
std::vector<vk::Image> tempImages1;
std::vector<vk::Image> images;
std::vector<ManagedShader> sets;
VkExtent2D dispatchExtent0{};
VkExtent2D dispatchExtent1{};
};
}

View file

@ -1,75 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "delta0.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <vector>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk::backend;
Delta0::Delta0(const Ctx& ctx, size_t idx,
const std::vector<std::vector<vk::Image>>& sourceImages,
const vk::Image& additionalInput0,
const vk::Image& additionalInput1) {
const size_t m = ctx.perf ? 1 : 2; // multiplier
const VkExtent2D extent = sourceImages.at(0).at(0).getExtent();
// create output images
this->images0.reserve(3);
for(size_t i = 0; i < 3; i++)
this->images0.emplace_back(ctx.vk, extent);
this->images1.reserve(m);
for (size_t i = 0; i < m; i++)
this->images1.emplace_back(ctx.vk, extent);
// create descriptor sets
const auto& shaders = (ctx.perf ?
ctx.shaders.get().performance : ctx.shaders.get().quality).delta;
this->sets0.reserve(sourceImages.size());
for (size_t i = 0; i < sourceImages.size(); i++)
this->sets0.emplace_back(ManagedShaderBuilder()
.sampleds(sourceImages.at((i + (sourceImages.size() - 1)) % sourceImages.size()))
.sampleds(sourceImages.at(i % sourceImages.size()))
.sampled(additionalInput0)
.storages(this->images0)
.sampler(ctx.bnwSampler)
.sampler(ctx.eabSampler)
.buffer(ctx.constantBuffers.at(idx))
.build(ctx.vk, ctx.pool, shaders.at(0)));
this->sets1.reserve(sourceImages.size());
for (size_t i = 0; i < sourceImages.size(); i++)
this->sets1.emplace_back(ManagedShaderBuilder()
.sampleds(sourceImages.at((i + (sourceImages.size() - 1)) % sourceImages.size()))
.sampleds(sourceImages.at(i % sourceImages.size()))
.sampled(additionalInput1)
.sampled(additionalInput0)
.storages(this->images1)
.sampler(ctx.bnwSampler)
.sampler(ctx.eabSampler)
.buffer(ctx.constantBuffers.at(idx))
.build(ctx.vk, ctx.pool, shaders.at(5)));
// store dispatch extents
this->dispatchExtent = backend::add_shift_extent(extent, 7, 3);
}
void Delta0::prepare(std::vector<VkImage>& images) const {
for (const auto& img : this->images0)
images.push_back(img.handle());
for (const auto& img : this->images1)
images.push_back(img.handle());
}
void Delta0::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const {
this->sets0.at(idx % this->sets0.size()).dispatch(vk, cmd, dispatchExtent);
this->sets1.at(idx % this->sets1.size()).dispatch(vk, cmd, dispatchExtent);
}

View file

@ -1,57 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "../helpers/managed_shader.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <vector>
#include <vulkan/vulkan_core.h>
namespace ctx { struct Ctx; }
namespace lsfgvk::backend {
/// delta shaderchain
class Delta0 {
public:
/// create a delta shaderchain
/// @param ctx context
/// @param idx generated frame index
/// @param sourceImages source images
/// @param additionalInput0 additional input image
/// @param additionalInput1 additional input image
Delta0(const Ctx& ctx, size_t idx,
const std::vector<std::vector<vk::Image>>& sourceImages,
const vk::Image& additionalInput0,
const vk::Image& additionalInput1);
/// prepare the shaderchain initially
/// @param images vector to fill with image handles
void prepare(std::vector<VkImage>& images) const;
/// render the delta shaderchain
/// @param vk the vulkan instance
/// @param cmd command buffer
/// @param idx frame index
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const;
/// get the generated images
/// @return vector of images
[[nodiscard]] const auto& getImages0() const { return this->images0; }
/// get the other generated images
/// @return vector of images
[[nodiscard]] const auto& getImages1() const { return this->images1; }
private:
std::vector<vk::Image> images0;
std::vector<vk::Image> images1;
std::vector<ManagedShader> sets0;
std::vector<ManagedShader> sets1;
VkExtent2D dispatchExtent{};
};
}

View file

@ -1,110 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "delta1.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <vector>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk::backend;
Delta1::Delta1(const Ctx& ctx, size_t idx,
const std::vector<vk::Image>& sourceImages0,
const std::vector<vk::Image>& sourceImages1,
const vk::Image& additionalInput0,
const vk::Image& additionalInput1,
const vk::Image& additionalInput2) {
const size_t m = ctx.perf ? 1 : 2; // multiplier
const VkExtent2D extent = sourceImages0.at(0).getExtent();
// create temporary & output images
for (size_t i = 0; i < (2 * m); i++) {
this->tempImages0.emplace_back(ctx.vk, extent);
this->tempImages1.emplace_back(ctx.vk, extent);
}
this->image0.emplace(ctx.vk,
VkExtent2D { extent.width, extent.height },
VK_FORMAT_R16G16B16A16_SFLOAT
);
this->image1.emplace(ctx.vk,
VkExtent2D { extent.width, extent.height },
VK_FORMAT_R16G16B16A16_SFLOAT
);
// create descriptor sets
const auto& shaders = (ctx.perf ?
ctx.shaders.get().performance : ctx.shaders.get().quality).delta;
this->sets.reserve(4 + 4);
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(sourceImages0)
.storages(this->tempImages0)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.at(1)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages0)
.storages(this->tempImages1)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.at(2)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages1)
.storages(this->tempImages0)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.at(3)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages0)
.sampled(additionalInput0)
.sampled(additionalInput1)
.storage(*this->image0)
.sampler(ctx.bnbSampler)
.sampler(ctx.eabSampler)
.buffer(ctx.constantBuffers.at(idx))
.build(ctx.vk, ctx.pool, shaders.at(4)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(sourceImages1)
.storages(this->tempImages0, 0, m)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.at(6)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages0, 0, m)
.storages(this->tempImages1, 0, m)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.at(7)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages1, 0, m)
.storages(this->tempImages0, 0, m)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.at(8)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages0, 0, m)
.sampled(additionalInput2)
.storage(*this->image1)
.sampler(ctx.bnbSampler)
.sampler(ctx.eabSampler)
.buffer(ctx.constantBuffers.at(idx))
.build(ctx.vk, ctx.pool, shaders.at(9)));
// store dispatch extents
this->dispatchExtent = backend::add_shift_extent(extent, 7, 3);
}
void Delta1::prepare(std::vector<VkImage>& images) const {
for (size_t i = 0; i < this->tempImages0.size(); i++) {
images.push_back(this->tempImages0.at(i).handle());
images.push_back(this->tempImages1.at(i).handle());
}
images.push_back(this->image0->handle());
images.push_back(this->image1->handle());
}
void Delta1::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const {
for (const auto& set : this->sets)
set.dispatch(vk, cmd, dispatchExtent);
}

View file

@ -1,62 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "../helpers/managed_shader.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <vector>
#include <vulkan/vulkan_core.h>
namespace ctx { struct Ctx; }
namespace lsfgvk::backend {
/// gamma shaderchain
class Delta1 {
public:
/// create a gamma shaderchain
/// @param ctx context
/// @param idx generated frame index
/// @param sourceImages0 source images
/// @param sourceImages1 source images
/// @param additionalInput0 additional input image
/// @param additionalInput1 additional input image
/// @param additionalInput2 additional input image
Delta1(const Ctx& ctx, size_t idx,
const std::vector<vk::Image>& sourceImages0,
const std::vector<vk::Image>& sourceImages1,
const vk::Image& additionalInput0,
const vk::Image& additionalInput1,
const vk::Image& additionalInput2);
/// prepare the shaderchain initially
/// @param images vector to fill with image handles
void prepare(std::vector<VkImage>& images) const;
/// render the gamma shaderchain
/// @param vk the vulkan instance
/// @param cmd command buffer
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const;
/// get the first generated image
/// @return image
[[nodiscard]] const auto& getImage0() const { return *this->image0; }
/// get the second generated image
/// @return image
[[nodiscard]] const auto& getImage1() const { return *this->image1; }
private:
std::vector<vk::Image> tempImages0;
std::vector<vk::Image> tempImages1;
ls::lazy<vk::Image> image0;
ls::lazy<vk::Image> image1;
std::vector<ManagedShader> sets;
VkExtent2D dispatchExtent{};
};
}

View file

@ -1,53 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "gamma0.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <vector>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk::backend;
Gamma0::Gamma0(const Ctx& ctx, size_t idx,
const std::vector<std::vector<vk::Image>>& sourceImages,
const vk::Image& additionalInput) {
const VkExtent2D extent = sourceImages.at(0).at(0).getExtent();
// create output images
this->images.reserve(3);
for(size_t i = 0; i < 3; i++)
this->images.emplace_back(ctx.vk, extent);
// create descriptor sets
const auto& shader = (ctx.perf ?
ctx.shaders.get().performance : ctx.shaders.get().quality).gamma.at(0);
this->sets.reserve(sourceImages.size());
for (size_t i = 0; i < sourceImages.size(); i++)
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(sourceImages.at((i + (sourceImages.size() - 1)) % sourceImages.size()))
.sampleds(sourceImages.at(i % sourceImages.size()))
.sampled(additionalInput)
.storages(this->images)
.sampler(ctx.bnwSampler)
.sampler(ctx.eabSampler)
.buffer(ctx.constantBuffers.at(idx))
.build(ctx.vk, ctx.pool, shader));
// store dispatch extents
this->dispatchExtent = backend::add_shift_extent(extent, 7, 3);
}
void Gamma0::prepare(std::vector<VkImage>& images) const {
for (const auto& img : this->images)
images.push_back(img.handle());
}
void Gamma0::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const {
this->sets.at(idx % this->sets.size()).dispatch(vk, cmd, dispatchExtent);
}

View file

@ -1,49 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "../helpers/managed_shader.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <vector>
#include <vulkan/vulkan_core.h>
namespace ctx { struct Ctx; }
namespace lsfgvk::backend {
/// gamma shaderchain
class Gamma0 {
public:
/// create a gamma shaderchain
/// @param ctx context
/// @param idx generated frame index
/// @param sourceImages source images
/// @param additionalInput additional input image
Gamma0(const Ctx& ctx, size_t idx,
const std::vector<std::vector<vk::Image>>& sourceImages,
const vk::Image& additionalInput);
/// prepare the shaderchain initially
/// @param images vector to fill with image handles
void prepare(std::vector<VkImage>& images) const;
/// render the gamma shaderchain
/// @param vk the vulkan instance
/// @param cmd command buffer
/// @param idx frame index
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const;
/// get the generated images
/// @return vector of images
[[nodiscard]] const auto& getImages() const { return this->images; }
private:
std::vector<vk::Image> images;
std::vector<ManagedShader> sets;
VkExtent2D dispatchExtent{};
};
}

View file

@ -1,78 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "gamma1.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <vector>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk::backend;
Gamma1::Gamma1(const Ctx& ctx, size_t idx,
const std::vector<vk::Image>& sourceImages,
const vk::Image& additionalInput0,
const vk::Image& additionalInput1) {
const size_t m = ctx.perf ? 1 : 2; // multiplier
const VkExtent2D extent = sourceImages.at(0).getExtent();
// create temporary & output images
for (size_t i = 0; i < (2 * m); i++) {
this->tempImages0.emplace_back(ctx.vk, extent);
this->tempImages1.emplace_back(ctx.vk, extent);
}
this->image.emplace(ctx.vk,
VkExtent2D { extent.width, extent.height },
VK_FORMAT_R16G16B16A16_SFLOAT
);
// create descriptor sets
const auto& shaders = (ctx.perf ?
ctx.shaders.get().performance : ctx.shaders.get().quality).gamma;
this->sets.reserve(4);
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(sourceImages)
.storages(this->tempImages0)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.at(1)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages0)
.storages(this->tempImages1)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.at(2)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages1)
.storages(this->tempImages0)
.sampler(ctx.bnbSampler)
.build(ctx.vk, ctx.pool, shaders.at(3)));
this->sets.emplace_back(ManagedShaderBuilder()
.sampleds(this->tempImages0)
.sampled(additionalInput0)
.sampled(additionalInput1)
.storage(*this->image)
.sampler(ctx.bnbSampler)
.sampler(ctx.eabSampler)
.buffer(ctx.constantBuffers.at(idx))
.build(ctx.vk, ctx.pool, shaders.at(4)));
// store dispatch extents
this->dispatchExtent = backend::add_shift_extent(extent, 7, 3);
}
void Gamma1::prepare(std::vector<VkImage>& images) const {
for (size_t i = 0; i < this->tempImages0.size(); i++) {
images.push_back(this->tempImages0.at(i).handle());
images.push_back(this->tempImages1.at(i).handle());
}
images.push_back(this->image->handle());
}
void Gamma1::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const {
for (const auto& set : this->sets)
set.dispatch(vk, cmd, dispatchExtent);
}

View file

@ -1,53 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "../helpers/managed_shader.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <vector>
#include <vulkan/vulkan_core.h>
namespace ctx { struct Ctx; }
namespace lsfgvk::backend {
/// gamma shaderchain
class Gamma1 {
public:
/// create a gamma shaderchain
/// @param ctx context
/// @param idx generated frame index
/// @param sourceImages source images
/// @param additionalInput0 additional input image
/// @param additionalInput1 additional input image
Gamma1(const Ctx& ctx, size_t idx,
const std::vector<vk::Image>& sourceImages,
const vk::Image& additionalInput0,
const vk::Image& additionalInput1);
/// prepare the shaderchain initially
/// @param images vector to fill with image handles
void prepare(std::vector<VkImage>& images) const;
/// render the gamma shaderchain
/// @param vk the vulkan instance
/// @param cmd command buffer
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd) const;
/// get the generated image
/// @return image
[[nodiscard]] const auto& getImage() const { return *this->image; }
private:
std::vector<vk::Image> tempImages0;
std::vector<vk::Image> tempImages1;
ls::lazy<vk::Image> image;
std::vector<ManagedShader> sets;
VkExtent2D dispatchExtent{};
};
}

View file

@ -1,57 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "generate.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <utility>
#include <vector>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk::backend;
Generate::Generate(const Ctx& ctx, size_t idx,
const std::pair<vk::Image, vk::Image>& sourceImages,
const vk::Image& inputImage1,
const vk::Image& inputImage2,
const vk::Image& inputImage3,
const vk::Image& outputImage) {
// create descriptor sets
const auto& shader = ctx.hdr ?
ctx.shaders.get().generate_hdr : ctx.shaders.get().generate;
this->sets.reserve(2);
this->sets.emplace_back(ManagedShaderBuilder()
.sampled(sourceImages.second)
.sampled(sourceImages.first)
.sampled(inputImage1)
.sampled(inputImage2)
.sampled(inputImage3)
.storage(outputImage)
.sampler(ctx.bnbSampler)
.sampler(ctx.eabSampler)
.buffer(ctx.constantBuffers.at(idx))
.build(ctx.vk, ctx.pool, shader));
this->sets.emplace_back(ManagedShaderBuilder()
.sampled(sourceImages.first)
.sampled(sourceImages.second)
.sampled(inputImage1)
.sampled(inputImage2)
.sampled(inputImage3)
.storage(outputImage)
.sampler(ctx.bnbSampler)
.sampler(ctx.eabSampler)
.buffer(ctx.constantBuffers.at(idx))
.build(ctx.vk, ctx.pool, shader));
// store dispatch extent
this->dispatchExtent = backend::add_shift_extent(ctx.sourceExtent, 15, 4);
}
void Generate::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const {
this->sets.at(idx % 2).dispatch(vk, cmd, this->dispatchExtent);
}

View file

@ -1,45 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "../helpers/managed_shader.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace ctx { struct Ctx; }
namespace lsfgvk::backend {
/// generate shaderchain
class Generate {
public:
/// create a generate shaderchain
/// @param ctx context
/// @param idx generated frame index
/// @param sourceImages pair of source images
/// @param inputImage1 input image 1
/// @param inputImage2 input image 2
/// @param inputImage3 input image 3
Generate(const Ctx& ctx, size_t idx,
const std::pair<vk::Image, vk::Image>& sourceImages,
const vk::Image& inputImage1,
const vk::Image& inputImage2,
const vk::Image& inputImage3,
const vk::Image& outputImage);
/// render the generate shaderchain
/// @param vk the vulkan instance
/// @param cmd command buffer
/// @param idx frame index
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const;
private:
std::vector<ManagedShader> sets;
VkExtent2D dispatchExtent{};
};
}

View file

@ -1,53 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "mipmaps.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <cstdint>
#include <utility>
#include <vector>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk::backend;
Mipmaps::Mipmaps(const Ctx& ctx,
const std::pair<vk::Image, vk::Image>& sourceImages) {
// create output images for base and 6 mips
this->images.reserve(7);
for (uint32_t i = 0; i < 7; i++)
this->images.emplace_back(ctx.vk,
backend::shift_extent(ctx.flowExtent, i), VK_FORMAT_R8_UNORM);
// create descriptor sets for both input images
this->sets.reserve(2);
this->sets.emplace_back(ManagedShaderBuilder()
.sampled(sourceImages.first)
.storages(this->images)
.sampler(ctx.bnbSampler)
.buffer(ctx.constantBuffer)
.build(ctx.vk, ctx.pool, ctx.shaders.get().mipmaps));
this->sets.emplace_back(ManagedShaderBuilder()
.sampled(sourceImages.second)
.storages(this->images)
.sampler(ctx.bnbSampler)
.buffer(ctx.constantBuffer)
.build(ctx.vk, ctx.pool, ctx.shaders.get().mipmaps));
// store dispatch extent
this->dispatchExtent = backend::add_shift_extent(ctx.flowExtent, 63, 6);
}
void Mipmaps::prepare(std::vector<VkImage>& images) const {
for (const auto& img : this->images)
images.push_back(img.handle());
}
void Mipmaps::render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const {
this->sets.at(idx % 2).dispatch(vk, cmd, this->dispatchExtent);
}

View file

@ -1,47 +0,0 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "../helpers/managed_shader.hpp"
#include "../helpers/utils.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace ctx { struct Ctx; }
namespace lsfgvk::backend {
/// mipmaps shaderchain
class Mipmaps {
public:
/// create a mipmaps shaderchain
/// @param ctx context
/// @param sourceImages pair of source images
Mipmaps(const Ctx& ctx,
const std::pair<vk::Image, vk::Image>& sourceImages);
/// prepare the shaderchain initially
/// @param images vector to fill with image handles
void prepare(std::vector<VkImage>& images) const;
/// render the mipmaps shaderchain
/// @param vk the vulkan instance
/// @param cmd command buffer
/// @param idx frame index
void render(const vk::Vulkan& vk, const vk::CommandBuffer& cmd, size_t idx) const;
/// get the generated mipmap images
/// @return vector of images
[[nodiscard]] const auto& getImages() const { return this->images; }
private:
std::vector<vk::Image> images;
std::vector<ManagedShader> sets;
VkExtent2D dispatchExtent{};
};
}

View file

@ -0,0 +1,578 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "pipelines.hpp"
#include "modules/pipeline/signature.hpp"
#include "modules/pipeline/signature/helpers.hpp"
#include "modules/pipeline/signature/image.hpp"
#include "modules/pipeline/signature/pass.hpp"
#include <cstddef>
#include <cstdint>
#include <vector>
using namespace lsfgvk;
namespace {
using namespace lsfgvk::pipeline;
/// Build the pipeline signature
consteval PipelineSignature buildPipelineSignature(bool perf) {
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wshadow"
PipelineSignatureBuilder s;
const Resource INVALID{};
auto sourceImageArray = s.registerImage({
.format = Format::RGBA8888,
.hdrFormat = Format::RGBA16161616,
.flags = ImageFlag::Pinned |
ImageFlag::ExternalInput |
ImageFlag::HdrVariant,
.count = 2
});
/* Pre-pass */
auto mipmapImageArray = s.registerImage({
.format = Format::R8,
.flags = ImageFlag::Mipmaps,
.extentOp = { true },
.count = 7
});
s.appendPass({
.shader = "mipmaps",
.inputs{
sourceImageArray
},
.outputs{
mipmapImageArray
},
.dispatchOp = { 63, 6 }
});
std::vector<size_t> alphaArray(7);
std::vector<ExtentOp> alphaExtents(7);
for (uint32_t i = 0; i < 7; i++) {
const uint32_t mul = perf ? 1 : 2;
const ExtentOp dispatch = { 7, 3 };
ExtentOp extent = { 0, 6 - i };
extent += { 1, 1 };
auto flipflop0 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 1 * mul
});
s.appendPass({
.shader = "alpha0",
.flags = PassFlag::Aggregate,
.inputs{
{ mipmapImageArray, 6 - i }
},
.outputs{
flipflop0
},
.dispatchOp = extent + dispatch
});
auto flipflop1 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 1 * mul
});
s.appendPass({
.shader = "alpha1",
.flags = PassFlag::Aggregate,
.inputs{
flipflop0
},
.outputs{
flipflop1
},
.dispatchOp = extent + dispatch
});
extent += { 1, 1 };
auto flipflop2 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 2 * mul
});
s.appendPass({
.shader = "alpha2",
.flags = PassFlag::Aggregate,
.inputs{
flipflop1
},
.outputs{
flipflop2
},
.dispatchOp = extent + dispatch
});
auto result = s.registerImage({
.format = Format::RGBA8888,
.flags = ImageFlag::Pinned,
.extentOp = extent,
.count = (2 * mul) * 3
});
s.appendPass({
.shader = "alpha3",
.flags = PassFlag::Aggregate,
.inputs{
flipflop2
},
.outputs{
result
},
.dispatchOp = extent + dispatch
});
alphaArray.at(6 - i) = result;
alphaExtents.at(6 - i) = extent;
}
ExtentOp extent = alphaExtents.at(0);
ExtentOp dispatch = { 7, 3 };
auto flipflop0 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 2
});
s.appendPass({
.shader = "beta0",
.inputs{
alphaArray.at(0)
},
.outputs{
flipflop0
},
.dispatchOp = extent + dispatch
});
auto flipflop1 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 2
});
s.appendPass({
.shader = "beta1",
.inputs{
flipflop0
},
.outputs{
flipflop1
},
.dispatchOp = extent + dispatch
});
auto flipflop2 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 2
});
s.appendPass({
.shader = "beta2",
.inputs{
flipflop1
},
.outputs{
flipflop2
},
.dispatchOp = extent + dispatch
});
auto flipflop3 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 2
});
s.appendPass({
.shader = "beta3",
.inputs{
flipflop2
},
.outputs{
flipflop3
},
.dispatchOp = extent + dispatch
});
auto betaImageArray = s.registerImage({
.format = Format::R8,
.flags = ImageFlag::Mipmaps,
.extentOp = extent,
.count = 6
});
dispatch = { 31, 5 };
s.appendPass({
.shader = "beta4",
.inputs{
flipflop3
},
.outputs{
betaImageArray
},
.dispatchOp = extent + dispatch
});
/* Main-pass */
s.split();
std::vector<size_t> gammaArray(7);
std::vector<size_t> deltaArray(3);
std::vector<size_t> epsilonArray(3);
for (uint32_t i = 0; i < 7; i++) {
const uint32_t mul = perf ? 1 : 2;
const ExtentOp dispatch = { 7, 3 };
const ExtentOp extent = alphaExtents.at(6 - i);
auto flipflop0 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 3
});
s.appendPass({
.shader = "gamma0",
.flags = PassFlag::Aggregate
| (i == 0 ? PassFlag::Special : PassFlag::None),
.inputs{
alphaArray.at(6 - i),
i == 0 ? INVALID : gammaArray.at(i - 1)
},
.outputs{
flipflop0
},
.dispatchOp = extent + dispatch
});
auto flipflop1 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 2 * mul
});
s.appendPass({
.shader = "gamma1",
.flags = PassFlag::Aggregate,
.inputs{
flipflop0
},
.outputs{
flipflop1
},
.dispatchOp = extent + dispatch
});
auto flipflop2 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 2 * mul
});
s.appendPass({
.shader = "gamma2",
.flags = PassFlag::Aggregate,
.inputs{
flipflop1
},
.outputs{
flipflop2
},
.dispatchOp = extent + dispatch
});
auto flipflop3 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 2 * mul
});
s.appendPass({
.shader = "gamma3",
.flags = PassFlag::Aggregate,
.inputs{
flipflop2
},
.outputs{
flipflop3
},
.dispatchOp = extent + dispatch
});
auto result = s.registerImage({
.format = Format::RGBA16161616,
.extentOp = extent
});
s.appendPass({
.shader = "gamma4",
.flags = PassFlag::Aggregate
| (i == 0 ? PassFlag::Special : PassFlag::None),
.inputs{
flipflop3,
i == 0 ? INVALID : gammaArray.at(i - 1),
{ betaImageArray, i == 0 ? 5 : (6 - i) }
},
.outputs{
result
},
.dispatchOp = extent + dispatch
});
gammaArray.at(i) = result;
if (i >= 4) {
auto flipflop0 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 3
});
s.appendPass({
.shader = "delta0",
.flags = PassFlag::Aggregate
| (i == 4 ? PassFlag::Special : PassFlag::None),
.inputs{
alphaArray.at(6 - i),
i == 4 ? INVALID : deltaArray.at(i - 5)
},
.outputs{
flipflop0
},
.dispatchOp = extent + dispatch
});
auto flipflop1 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 2 * mul
});
s.appendPass({
.shader = "delta1",
.flags = PassFlag::Aggregate,
.inputs{
flipflop0
},
.outputs{
flipflop1
},
.dispatchOp = extent + dispatch
});
auto flipflop2 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 2 * mul
});
s.appendPass({
.shader = "delta2",
.flags = PassFlag::Aggregate,
.inputs{
flipflop1
},
.outputs{
flipflop2
},
.dispatchOp = extent + dispatch
});
auto flipflop3 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = 2 * mul
});
s.appendPass({
.shader = "delta3",
.flags = PassFlag::Aggregate,
.inputs{
flipflop2
},
.outputs{
flipflop3
},
.dispatchOp = extent + dispatch
});
auto result = s.registerImage({
.format = Format::RGBA16161616,
.extentOp = extent,
.count = 1
});
s.appendPass({
.shader = "delta4",
.flags = PassFlag::Aggregate
| (i == 4 ? PassFlag::Special : PassFlag::None),
.inputs{
flipflop3,
i == 4 ? INVALID : deltaArray.at(i - 5),
{ betaImageArray, 6 - i }
},
.outputs{
result
},
.dispatchOp = extent + dispatch
});
deltaArray.at(i - 4) = result;
}
if (i >= 4) {
auto flipflop0 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = mul
});
s.appendPass({
.shader = "epsilon0",
.flags = PassFlag::Aggregate
| (i == 4 ? PassFlag::Special : PassFlag::None),
.inputs{
alphaArray.at(6 - i),
gammaArray.at(i - 1),
i == 4 ? INVALID : deltaArray.at(i - 5)
},
.outputs{
flipflop0
},
.dispatchOp = extent + dispatch
});
auto flipflop1 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = mul
});
s.appendPass({
.shader = "epsilon1",
.flags = PassFlag::Aggregate,
.inputs{
flipflop0
},
.outputs{
flipflop1
},
.dispatchOp = extent + dispatch
});
auto flipflop2 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = mul
});
s.appendPass({
.shader = "epsilon2",
.flags = PassFlag::Aggregate,
.inputs{
flipflop1
},
.outputs{
flipflop2
},
.dispatchOp = extent + dispatch
});
auto flipflop3 = s.registerImage({
.format = Format::RGBA8888,
.extentOp = extent,
.count = mul
});
s.appendPass({
.shader = "epsilon3",
.flags = PassFlag::Aggregate,
.inputs{
flipflop2
},
.outputs{
flipflop3
},
.dispatchOp = extent + dispatch
});
auto result = s.registerImage({
.format = Format::RGBA16161616,
.extentOp = extent,
.count = 1
});
s.appendPass({
.shader = "epsilon4",
.flags = PassFlag::Aggregate
| (i == 4 ? PassFlag::Special : PassFlag::None),
.inputs{
flipflop3,
i == 4 ? INVALID : epsilonArray.at(i - 5)
},
.outputs{
result
},
.dispatchOp = extent + dispatch
});
epsilonArray.at(i - 4) = result;
}
}
extent = { false };
dispatch = { 15, 4 };
auto result = s.registerImage({
.format = Format::RGBA8888,
.hdrFormat = Format::RGBA16161616,
.flags = ImageFlag::Pinned
| ImageFlag::ExternalOutput
| ImageFlag::HdrVariant,
.extentOp = extent,
.count = 1
});
s.appendPass({
.shader = "generate",
.flags = PassFlag::HdrVariant,
.inputs{
sourceImageArray,
gammaArray.at(6),
deltaArray.at(2),
epsilonArray.at(2)
},
.outputs{
result
},
.dispatchOp = extent + dispatch
});
return s.finalize();
#pragma clang diagnostic pop
}
}
const PipelineSignature& lsfgvk::getPipelineSignature(bool perf) {
static const PipelineSignature signature = buildPipelineSignature(false);
static const PipelineSignature perfSignature = buildPipelineSignature(true);
return perf ? perfSignature : signature;
}

View file

@ -0,0 +1,17 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#include "modules/pipeline/signature.hpp"
namespace lsfgvk {
///
/// Get the pipeline signature
///
/// @param perf Performance mode
/// @return Pipeline signature
///
const pipeline::PipelineSignature& getPipelineSignature(bool perf);
}

View file

@ -0,0 +1,656 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "vkhelper.hpp"
#include <algorithm>
#include <array>
#include <bitset>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <filesystem>
#include <fstream>
#include <iomanip>
#include <ios>
#include <iostream>
#include <optional>
#include <span>
#include <sstream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
/* Device initialization */
vk::UniqueInstance vkhelper::createInstance(vk::detail::DispatchLoaderDynamic& dld) {
dld.init();
const vk::ApplicationInfo appInfo{
.pApplicationName = "lsfg-vk",
.applicationVersion = vk::makeVersion(2, 0, 0),
.pEngineName = "lsfg-vk",
.engineVersion = vk::makeVersion(2, 0, 0),
.apiVersion = vk::ApiVersion12 // Fully supported by all Vulkan-capable GPUs
};
const vk::InstanceCreateInfo instanceInfo{
.pApplicationInfo = &appInfo
};
auto instance{vk::createInstanceUnique(instanceInfo, nullptr, dld)};
dld.init(*instance);
return instance;
}
vk::PhysicalDevice vkhelper::findPhysicalDevice(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Instance& instance,
const std::string& id
) {
for (const auto& physdev : instance.enumeratePhysicalDevices(dld)) {
// Check for VK_EXT_pci_bus_info
bool supportsPCIEXT{false};
for (const auto& ext : physdev.enumerateDeviceExtensionProperties(nullptr, dld)) {
if (std::string(ext.extensionName) != vk::EXTPciBusInfoExtensionName)
continue;
supportsPCIEXT = true;
break;
}
// Fetch properties
vk::PhysicalDevicePCIBusInfoPropertiesEXT busInfo{};
vk::PhysicalDeviceProperties2 info{
.pNext = supportsPCIEXT ? &busInfo : nullptr
};
physdev.getProperties2(&info, dld);
auto& props{info.properties};
// Check first if id is not given
if (id.empty())
return physdev;
// Compare device name
props.deviceName.back() = '\0'; // Ensure null-termination
if (id == std::string(props.deviceName))
return physdev;
// Compare Vendor ID + Device ID
std::ostringstream gpuss;
gpuss << std::hex << std::setfill('0')
<< std::setw(4) << props.vendorID << ":"
<< std::setw(4) << props.deviceID;
if (id == gpuss.str())
return physdev;
// Compare PCI bus ID
if (!supportsPCIEXT)
continue;
std::ostringstream pciss;
pciss << std::hex << std::setfill('0')
<< std::setw(4) << busInfo.pciDomain << ":"
<< std::setw(2) << busInfo.pciBus << ":"
<< std::setw(2) << busInfo.pciDevice << "."
<< std::setw(1) << busInfo.pciFunction;
if (id == pciss.str())
return physdev;
}
throw std::runtime_error("No physical device matching '" + id + "' found");
}
uint32_t vkhelper::findComputeQueueFamilyIndex(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::PhysicalDevice& physdev
) {
uint32_t idx{0};
for (const auto& qfi : physdev.getQueueFamilyProperties2(dld)) {
if (qfi.queueFamilyProperties.queueFlags & vk::QueueFlagBits::eCompute)
return idx;
idx++;
}
throw std::runtime_error("No compute-capable queue family found");
}
bool vkhelper::checkHalfPrecisionSupport(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::PhysicalDevice& physdev
) {
vk::PhysicalDeviceVulkan12Features featuresVulkan12{};
vk::PhysicalDeviceFeatures2 features{
.pNext = &featuresVulkan12
};
physdev.getFeatures2(&features, dld);
return featuresVulkan12.shaderFloat16;
}
std::pair<vk::UniqueDevice, vk::Queue> vkhelper::createDevice(
vk::detail::DispatchLoaderDynamic& dld,
const vk::PhysicalDevice& physdev,
uint32_t qfi,
bool fp16
) {
constexpr std::array<const char*, 3> EXTENSIONS{
vk::KHRSynchronization2ExtensionName,
vk::KHRExternalMemoryFdExtensionName,
vk::KHRExternalSemaphoreFdExtensionName
};
vk::PhysicalDeviceSynchronization2FeaturesKHR sync2Info{
.synchronization2 = VK_TRUE
};
const vk::PhysicalDeviceVulkan12Features vk12Info{
.pNext = &sync2Info,
.shaderFloat16 = fp16,
.timelineSemaphore = VK_TRUE
};
const float queuePriority{1.0F}; // Highest priority
const vk::DeviceQueueCreateInfo queueInfo{
.queueFamilyIndex = qfi,
.queueCount = 1,
.pQueuePriorities = &queuePriority
};
const vk::DeviceCreateInfo deviceInfo{
.pNext = &vk12Info,
.queueCreateInfoCount = 1,
.pQueueCreateInfos = &queueInfo,
.enabledExtensionCount = static_cast<uint32_t>(EXTENSIONS.size()),
.ppEnabledExtensionNames = EXTENSIONS.data()
};
auto device{physdev.createDeviceUnique(deviceInfo, nullptr, dld)};
dld.init(*device);
return{
std::move(device),
device->getQueue(qfi, 0, dld)
};
}
/* Shader modules & pipelines */
vk::UniqueShaderModule vkhelper::createShaderModule(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const std::span<const uint32_t>& code
) {
const vk::ShaderModuleCreateInfo shaderInfo{
.codeSize = code.size() * sizeof(uint32_t),
.pCode = code.data()
};
return device.createShaderModuleUnique(shaderInfo, nullptr, dld);
}
namespace {
/// Find the cache file path
std::filesystem::path findPipelineCache(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::PhysicalDevice& physdev,
std::string_view tag
) {
// First find the base path
std::filesystem::path path{"/tmp/lsfg-vk"};
const char* xdgCacheHome{std::getenv("XDG_CACHE_HOME")};
if (xdgCacheHome && *xdgCacheHome != '\0')
path = std::filesystem::path(xdgCacheHome) / "lsfg-vk";
const char* home{std::getenv("HOME")};
if (home && *home != '\0')
path = std::filesystem::path(home) / ".cache" / "lsfg-vk";
// Ensure the directory exists
if (!std::filesystem::exists(path))
std::filesystem::create_directories(path);
// Calculate the physical device UUID
vk::PhysicalDeviceProperties2 info{};
physdev.getProperties2(&info, dld);
std::ostringstream ss;
ss << std::hex << std::setfill('0');
for (uint32_t i = 0; i < 16; i++) {
ss << std::setw(2) << static_cast<uint32_t>(info.properties.pipelineCacheUUID.at(i));
if (i == 3 || i == 5 || i == 7 || i == 9) {
ss << "-";
}
}
// Return the full path
return path / ("cache_" + std::string(tag) + "_" + ss.str() + ".bin");
}
}
std::pair<vk::UniquePipelineCache, bool> vkhelper::createPipelineCache(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
std::string_view tag
) {
const std::filesystem::path path{findPipelineCache(dld, physdev, tag)};
const bool valid{std::filesystem::exists(path) && std::filesystem::file_size(path) > 32};
// Read cache data (if any)
std::vector<uint8_t> cacheData{};
if (std::filesystem::exists(path)) {
std::ifstream file(path, std::ios::binary | std::ios::ate);
if (!file.is_open())
throw std::runtime_error("Unable to open pipeline cache file for reading");
const std::streamsize size{static_cast<std::streamsize>(file.tellg())};
cacheData = std::vector<uint8_t>(static_cast<size_t>(size));
file.seekg(0, std::ios::beg);
if (!file.read(reinterpret_cast<char*>(cacheData.data()), size)) // NOLINT (unsafe cast)
throw std::runtime_error("Unable to read pipeline cache file");
}
// Build pipeline cache
const vk::PipelineCacheCreateInfo pipelineCacheInfo{
.initialDataSize = cacheData.size(),
.pInitialData = cacheData.data()
};
return { device.createPipelineCacheUnique(pipelineCacheInfo, nullptr, dld), valid };
}
void vkhelper::persistPipelineCache(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
const vk::PipelineCache& cache,
std::string_view tag
) {
const std::filesystem::path path{findPipelineCache(dld, physdev, tag)};
std::ofstream file(path, std::ios::binary | std::ios::trunc);
if (!file.is_open())
throw std::runtime_error("Unable to open pipeline cache file for writing");
const std::vector<uint8_t> cacheData{
device.getPipelineCacheData(cache, dld)
};
file.write(
reinterpret_cast<const char*>(cacheData.data()), // NOLINT (unsafe cast)
static_cast<std::streamsize>(cacheData.size())
);
file.flush();
file.close();
}
std::pair<vk::UniqueDescriptorSetLayout, vk::UniquePipelineLayout> vkhelper::createLayout(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
size_t pushConstantSize
) {
const vk::DescriptorSetLayoutCreateInfo layoutInfo{
.flags = vk::DescriptorSetLayoutCreateFlagBits::eUpdateAfterBindPool,
.bindingCount = static_cast<uint32_t>(bindings.size()),
.pBindings = bindings.data()
};
auto descriptorSetLayout{device.createDescriptorSetLayoutUnique(layoutInfo, nullptr, dld)};
const vk::PushConstantRange pushConstantRange{
.stageFlags = vk::ShaderStageFlagBits::eCompute,
.size = static_cast<uint32_t>(pushConstantSize)
};
const vk::PipelineLayoutCreateInfo pipelineLayoutInfo{
.setLayoutCount = 1,
.pSetLayouts = &*descriptorSetLayout,
.pushConstantRangeCount = 1,
.pPushConstantRanges = &pushConstantRange
};
auto pipelineLayout{device.createPipelineLayoutUnique(pipelineLayoutInfo, nullptr, dld)};
return { std::move(descriptorSetLayout), std::move(pipelineLayout) };
}
/* Resources */
vk::UniqueImage vkhelper::createImage(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
vk::Extent2D extent,
vk::Format format,
uint32_t layers,
vk::ImageUsageFlags usage
) {
const vk::ImageCreateInfo imageInfo{
.imageType = vk::ImageType::e2D,
.format = format,
.extent = {
.width = extent.width,
.height = extent.height,
.depth = 1
},
.mipLevels = 1,
.arrayLayers = layers,
.samples = vk::SampleCountFlagBits::e1,
.usage = usage
};
return device.createImageUnique(imageInfo, nullptr, dld);
}
vk::UniqueSampler vkhelper::createSampler(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
vk::SamplerAddressMode mode,
vk::CompareOp compare,
bool white
) {
const vk::SamplerCreateInfo samplerInfo{
.magFilter = vk::Filter::eLinear,
.minFilter = vk::Filter::eLinear,
.mipmapMode = vk::SamplerMipmapMode::eLinear,
.addressModeU = mode,
.addressModeV = mode,
.addressModeW = mode,
.compareOp = compare,
.maxLod = vk::LodClampNone,
.borderColor = white ?
vk::BorderColor::eFloatOpaqueWhite : vk::BorderColor::eFloatTransparentBlack
};
return device.createSamplerUnique(samplerInfo, nullptr, dld);
}
std::pair<vk::UniqueBuffer, vk::UniqueDeviceMemory> vkhelper::createBuffer(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
vk::BufferUsageFlags usage,
const void* data,
size_t size
) {
// Create buffer
const vk::BufferCreateInfo bufferInfo{
.size = size,
.usage = usage,
.sharingMode = vk::SharingMode::eExclusive
};
auto buffer{device.createBufferUnique(bufferInfo, nullptr, dld)};
// Allocate memory
const auto requirements{device.getBufferMemoryRequirements(*buffer, dld)};
auto memory{vkhelper::allocateMemory(
dld,
device,
physdev,
requirements.size,
requirements.memoryTypeBits,
true
)};
// Bind memory
device.bindBufferMemory(*buffer, *memory, 0, dld);
// Copy data
if (data) {
void* mapped{device.mapMemory(*memory, 0, size, {}, dld)};
std::copy_n(
reinterpret_cast<const uint8_t*>(data), // NOLINT (unsafe cast)
size,
reinterpret_cast<uint8_t*>(mapped) // NOLINT (unsafe cast)
);
device.unmapMemory(*memory, dld);
}
return {
std::move(buffer),
std::move(memory)
};
}
/* Memory allocations */
vk::UniqueDeviceMemory vkhelper::allocateMemory(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
vk::DeviceSize size,
std::bitset<32> types,
bool hostVisible
) {
// Find a suitable memory type index
const auto memProps{physdev.getMemoryProperties2(dld)};
std::optional<uint32_t> selectedTypeIdx{};
for (uint32_t i = 0; i < memProps.memoryProperties.memoryTypeCount; i++) {
if (!types.test(i))
continue;
const auto& memType{memProps.memoryProperties.memoryTypes.at(i)};
const bool isHostVisible{
memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible &&
memType.propertyFlags & vk::MemoryPropertyFlagBits::eHostCoherent
};
if (hostVisible && !isHostVisible)
continue;
selectedTypeIdx = i;
if (memType.propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal)
break;
// Fallback to host-visible memory if no device-local memory is available
}
if (!selectedTypeIdx)
throw std::runtime_error("No suitable memory type found for allocation");
// Allocate memory
const vk::MemoryAllocateInfo allocInfo{
.allocationSize = size,
.memoryTypeIndex = *selectedTypeIdx
};
return device.allocateMemoryUnique(allocInfo, nullptr, dld);
}
/* Descriptors */
std::pair<vk::UniqueDescriptorPool, vk::DescriptorSet> vkhelper::createDescriptorSet(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::DescriptorSetLayout& layout,
uint32_t samplers, uint32_t buffers,
uint32_t sampledImages, uint32_t storageImages
) {
const std::array<vk::DescriptorPoolSize, 4> poolSizes{{
{ .type = vk::DescriptorType::eSampler,
.descriptorCount = samplers },
{ .type = vk::DescriptorType::eSampledImage,
.descriptorCount = sampledImages },
{ .type = vk::DescriptorType::eStorageImage,
.descriptorCount = storageImages },
{ .type = vk::DescriptorType::eUniformBuffer,
.descriptorCount = buffers }
}};
auto pool{device.createDescriptorPoolUnique({
.flags = vk::DescriptorPoolCreateFlagBits::eUpdateAfterBind,
.maxSets = 1,
.poolSizeCount = static_cast<uint32_t>(poolSizes.size()),
.pPoolSizes = poolSizes.data()
}, nullptr, dld)};
auto set{device.allocateDescriptorSets({
.descriptorPool = *pool,
.descriptorSetCount = 1,
.pSetLayouts = &layout
}, dld).at(0)};
return{
std::move(pool),
set
};
}
vk::UniqueImageView vkhelper::createImageView(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::Image& image,
vk::Format format,
uint32_t layers
) {
const vk::ImageViewCreateInfo viewInfo{
.image = image,
.viewType = layers == 1 ? vk::ImageViewType::e2D : vk::ImageViewType::e2DArray,
.format = format,
.subresourceRange = {
.aspectMask = vk::ImageAspectFlagBits::eColor,
.levelCount = 1,
.layerCount = layers
}
};
return device.createImageViewUnique(viewInfo, nullptr, dld);
}
/* Command buffers */
vk::UniqueCommandPool vkhelper::createCommandPool(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
uint32_t qfi
) {
const vk::CommandPoolCreateInfo cmdpoolInfo{
.queueFamilyIndex = qfi
};
return device.createCommandPoolUnique(cmdpoolInfo, nullptr, dld);
}
vk::UniqueCommandBuffer vkhelper::createCommandBuffer(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::CommandPool& cmdpool
) {
const vk::CommandBufferAllocateInfo cmdbufInfo{
.commandPool = cmdpool,
.commandBufferCount = 1
};
return { std::move(device.allocateCommandBuffersUnique(cmdbufInfo, dld).front()) };
}
vk::UniqueSemaphore vkhelper::createTimelineSemaphore(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
bool exportable
) {
const vk::ExportSemaphoreCreateInfo exportInfo{
.handleTypes = vk::ExternalSemaphoreHandleTypeFlagBits::eOpaqueFd
};
const vk::SemaphoreTypeCreateInfo typeInfo{
.pNext = exportable ? &exportInfo : nullptr,
.semaphoreType = vk::SemaphoreType::eTimeline,
};
const vk::SemaphoreCreateInfo createInfo{
.pNext = &typeInfo,
};
return device.createSemaphoreUnique(createInfo, nullptr, dld);
}
vk::UniqueFence vkhelper::createFence(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device
) {
return device.createFenceUnique({}, nullptr, dld);
}
/* External memory */
std::pair<vk::UniqueImage, vk::UniqueDeviceMemory> vkhelper::createExternalImage(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
vk::Extent2D extent,
vk::Format format,
uint32_t layers,
vk::ImageUsageFlags usage
) {
const vk::ExternalMemoryImageCreateInfo externalInfo{
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eOpaqueFd
};
const vk::ImageCreateInfo imageInfo{
.pNext = &externalInfo,
.imageType = vk::ImageType::e2D,
.format = format,
.extent = {
.width = extent.width,
.height = extent.height,
.depth = 1
},
.mipLevels = 1,
.arrayLayers = layers,
.samples = vk::SampleCountFlagBits::e1,
.usage = usage
};
auto image{device.createImageUnique(imageInfo, nullptr, dld)};
// Find a suitable memory type index
const auto memProps{physdev.getMemoryProperties2(dld)};
const auto requirements{device.getImageMemoryRequirements(*image, dld)};
std::optional<uint32_t> selectedTypeIdx{};
for (uint32_t i = 0; i < memProps.memoryProperties.memoryTypeCount; i++) {
if (!std::bitset<32>(requirements.memoryTypeBits).test(i))
continue;
const auto& memType{memProps.memoryProperties.memoryTypes.at(i)};
if (memType.propertyFlags & vk::MemoryPropertyFlagBits::eDeviceLocal) {
selectedTypeIdx = i;
break;
}
}
if (!selectedTypeIdx)
throw std::runtime_error("No suitable memory type found for allocation");
// Allocate memory
const vk::MemoryDedicatedAllocateInfo dedicatedInfo{
.image = *image,
};
const vk::ExportMemoryAllocateInfo exportInfo{
.pNext = &dedicatedInfo,
.handleTypes = vk::ExternalMemoryHandleTypeFlagBits::eOpaqueFd
};
const vk::MemoryAllocateInfo allocInfo{
.pNext = &exportInfo,
.allocationSize = requirements.size,
.memoryTypeIndex = *selectedTypeIdx
};
auto memory{device.allocateMemoryUnique(allocInfo, nullptr, dld)};
// Bind memory
device.bindImageMemory(*image, *memory, 0, dld);
return{
std::move(image),
std::move(memory)
};
}
int vkhelper::exportMemoryFd(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::DeviceMemory& memory
) {
const vk::MemoryGetFdInfoKHR fdInfo{
.memory = memory,
.handleType = vk::ExternalMemoryHandleTypeFlagBits::eOpaqueFd
};
return device.getMemoryFdKHR(fdInfo, dld);
}
int vkhelper::exportSemaphoreFd(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::Semaphore& semaphore
) {
const vk::SemaphoreGetFdInfoKHR fdInfo{
.semaphore = semaphore,
.handleType = vk::ExternalSemaphoreHandleTypeFlagBits::eOpaqueFd
};
return device.getSemaphoreFdKHR(fdInfo, dld);
}

View file

@ -0,0 +1,449 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#pragma once
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
#define VULKAN_HPP_TYPESAFE_CONVERSION 0
#define VULKAN_HPP_NO_DEFAULT_DISPATCHER 1
#define VULKAN_HPP_NO_CONSTRUCTORS 1
#define VULKAN_HPP_NO_SETTERS 1
#define VULKAN_HPP_NO_SPACESHIP_OPERATOR 1
#define VULKAN_HPP_NO_TO_STRING 1
#include <vulkan/vulkan.hpp> // IWYU pragma: export
// IWYU pragma: begin_exports
#include <vulkan/vulkan_core.h>
#include <vulkan/vulkan_enums.hpp>
#include <vulkan/vulkan_funcs.hpp>
#include <vulkan/vulkan_handles.hpp>
#include <vulkan/vulkan_hpp_macros.hpp>
#include <vulkan/vulkan_structs.hpp>
// IWYU pragma: end_exports
#include <bitset>
#include <cstddef>
#include <cstdint>
#include <span>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
namespace vkhelper {
/* Device initialization */
///
/// Create a Vulkan 1.2 instance for lsfg-vk
///
/// @param dld Dynamic dispatch loader
/// @return RAII-wrapped Vulkan instance
/// @throws std::runtime_error on failure
///
vk::UniqueInstance createInstance(vk::detail::DispatchLoaderDynamic& dld);
///
/// Find a physical device through a custom identifier
///
/// The custom identifier may be one of:
/// - Device name (e.g. "NVIDIA GeForce RTX 5080")
/// - Vendor ID + Device ID in lowercase hexadecimal (e.g. "10de:2c02")
/// - PCI bus ID with padded zeroes (e.g. "0000:01:00.0")
///
/// @param dld Dynamic dispatch loader
/// @param instance Vulkan instance
/// @param id Custom identifier
/// @return Selected physical device
/// @throws std::runtime_error if no suitable device found
///
vk::PhysicalDevice findPhysicalDevice(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Instance& instance,
const std::string& id
);
///
/// Find the first compute-capable queue family index
///
/// @param dld Dynamic dispatch loader
/// @param physdev Physical device
/// @return Queue family index
/// @throws std::runtime_error if no compute-capable queue found
///
uint32_t findComputeQueueFamilyIndex(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::PhysicalDevice& physdev
);
///
/// Check a physical device for half-precision float support
///
/// @param dld Dynamic dispatch loader
/// @param physdev Physical device
/// @return Whether half-precision float is supported
///
bool checkHalfPrecisionSupport(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::PhysicalDevice& physdev
);
///
/// Create a Vulkan device for lsfg-vk
///
/// This device will have the core features timelineSemaphore and shaderFloat16 (if requested)
/// enabled, as well as the synchronization2, external memory & semaphore fd extensions.
///
/// @param dld Dynamic dispatch loader
/// @param physdev Physical device
/// @param qfi Queue family index of compute-capable queue
/// @param fp16 Whether to enable half-precision float support
/// @return RAII-wrapped Vulkan device & compute queue
/// @throws std::runtime_error on failure
///
std::pair<vk::UniqueDevice, vk::Queue> createDevice(
vk::detail::DispatchLoaderDynamic& dld,
const vk::PhysicalDevice& physdev,
uint32_t qfi,
bool fp16
);
/* Shader modules & pipelines */
///
/// Create a Vulkan shader module from SPIR-V bytecode
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param code SPIR-V bytecode
/// @return RAII-wrapped Vulkan shader module
/// @throws std::runtime_error on failure
///
vk::UniqueShaderModule createShaderModule(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const std::span<const uint32_t>& code
);
///
/// Create and maintain the Vulkan pipeline cache for lsfg-vk
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param physdev Physical device
/// @param tag Cache tag for different pipelines
/// @return RAII-wrapped Vulkan pipeline cache
/// @throws std::runtime_error on failure
///
std::pair<vk::UniquePipelineCache, bool> createPipelineCache(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
std::string_view tag
);
///
/// Persist the Vulkan pipeline cache to disk
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param physdev Physical device
/// @param cache Vulkan pipeline cache
/// @param tag Cache tag for different pipelines
/// @throws std::runtime_error on failure
///
void persistPipelineCache(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
const vk::PipelineCache& cache,
std::string_view tag
);
///
/// Create a Vulkan descriptor set layout
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param bindings List of descriptor set layout bindings
/// @param pushConstantSize Size of push constant range
/// @return RAII-wrapped Vulkan descriptor set & pipeline layout
/// @throws std::runtime_error on failure
///
std::pair<vk::UniqueDescriptorSetLayout, vk::UniquePipelineLayout> createLayout(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const std::vector<vk::DescriptorSetLayoutBinding>& bindings,
size_t pushConstantSize
);
/* Resources */
///
/// Create a (unallocated) Vulkan image for lsfg-vk
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param extent Image extent
/// @param format Image format
/// @param layers Amount of images
/// @param usage Image usage flags
/// @return RAII-wrapped Vulkan image
/// @throws std::runtime_error on failure
///
vk::UniqueImage createImage(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
vk::Extent2D extent,
vk::Format format,
uint32_t layers,
vk::ImageUsageFlags usage
);
///
/// Create a Vulkan sampler for lsfg-vk
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param mode Address mode
/// @param compare Comparison mode
/// @param white Black/White border color
/// @return RAII-wrapped Vulkan sampler
/// @throws std::runtime_error on failure
///
vk::UniqueSampler createSampler(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
vk::SamplerAddressMode mode,
vk::CompareOp compare,
bool white
);
// (forward decl)
std::pair<vk::UniqueBuffer, vk::UniqueDeviceMemory> createBuffer(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
vk::BufferUsageFlags usage,
const void* data,
size_t size
);
///
/// Create a Vulkan buffer for lsfg-vk
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param physdev Physical device
/// @param data Buffer contained data
/// @return RAII-wrapped Vulkan uniform buffer & device memory
/// @throws std::runtime_error on failure
///
template<typename T>
std::pair<vk::UniqueBuffer, vk::UniqueDeviceMemory> createBuffer(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
const T& data
) {
return createBuffer(
dld,
device,
physdev,
vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eTransferDst,
static_cast<const void*>(&data),
sizeof(T)
);
}
/* Memory allocations */
///
/// Create a Vulkan memory allocation
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param physdev Physical device
/// @param size Allocation size
/// @param types Valid memory type bits
/// @param hostVisible Require host visible memory
/// @return RAII-wrapped Vulkan device memory
/// @throws std::runtime_error on failure
///
vk::UniqueDeviceMemory allocateMemory(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
vk::DeviceSize size,
std::bitset<32> types,
bool hostVisible = false
);
///
/// Align a memory allocation
///
/// @param size Memory size
/// @param align Alignment
/// @return Aligned memory size
///
inline vk::DeviceSize align(vk::DeviceSize size, vk::DeviceSize align) noexcept {
return (size + align - 1) & ~(align - 1);
}
/* Descriptors */
///
/// Create a Vulkan descriptor set for lsfg-vk
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param layout Descriptor set layout
/// @param samplers Amount of samplers
/// @param buffers Amount of buffers
/// @param sampledImages Amount of sampled images
/// @param storageImages Amount of storage images
/// @return Vulkan descriptor pool & set
/// @throws std::runtime_error on failure
///
std::pair<vk::UniqueDescriptorPool, vk::DescriptorSet> createDescriptorSet(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::DescriptorSetLayout& layout,
uint32_t samplers, uint32_t buffers,
uint32_t sampledImages, uint32_t storageImages
);
///
/// Create an image view
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param image Vulkan image
/// @param format Image format
/// @param layers Amount of layers in image
/// @return RAII-wrapped Vulkan image view
/// @throws std::runtime_error on failure
///
vk::UniqueImageView createImageView(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::Image& image,
vk::Format format,
uint32_t layers
);
/* Command buffers */
///
/// Create a Vulkan command pool for lsfg-vk
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param qfi Queue family index
/// @return RAII-wrapped Vulkan command pool
/// @throws std::runtime_error on failure
///
vk::UniqueCommandPool createCommandPool(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
uint32_t qfi
);
///
/// Create a Vulkan command buffer for lsfg-vk
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param cmdpool Vulkan command pool
/// @return RAII-wrapped Vulkan command buffer
/// @throws std::runtime_error on failure
///
vk::UniqueCommandBuffer createCommandBuffer(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::CommandPool& cmdpool
);
///
/// Create a timeline semaphore
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param exportable Whether the semaphore should be exportable as a fd
/// @return RAII-wrapped Vulkan semaphore
/// @throws std::runtime_error on failure
///
vk::UniqueSemaphore createTimelineSemaphore(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
bool exportable = false
);
///
/// Create a fence
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @return RAII-wrapped Vulkan fence
/// @throws std::runtime_error on failure
///
vk::UniqueFence createFence(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device
);
/* External memory */
///
/// Create a Vulkan image with a fd-exportable dedicated allocation
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param physdev Physical device
/// @param extent Image extent
/// @param format Image format
/// @param layers Amount of images
/// @param usage Image usage flags
/// @return RAII-wrapped Vulkan image
/// @throws std::runtime_error on failure
///
std::pair<vk::UniqueImage, vk::UniqueDeviceMemory> createExternalImage(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::PhysicalDevice& physdev,
vk::Extent2D extent,
vk::Format format,
uint32_t layers,
vk::ImageUsageFlags usage
);
///
/// Export a Vulkan memory allocation as a fd
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param memory Vulkan device memory
/// @return File descriptor to the allocation
/// @throws std::runtime_error on failure
///
int exportMemoryFd(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::DeviceMemory& memory
);
///
/// Export a Vulkan semaphore as a fd
///
/// @param dld Dynamic dispatch loader
/// @param device Vulkan device
/// @param semaphore Vulkan semaphore
/// @return File descriptor to the semaphore
/// @throws std::runtime_error on failure
///
int exportSemaphoreFd(
const vk::detail::DispatchLoaderDynamic& dld,
const vk::Device& device,
const vk::Semaphore& semaphore
);
}

View file

@ -6,12 +6,14 @@ set(CLI_SOURCES
add_executable(lsfg-vk-cli ${CLI_SOURCES})
target_include_directories(lsfg-vk-cli SYSTEM
PRIVATE thirdparty/include)
target_link_libraries(lsfg-vk-cli
PUBLIC lsfg-vk-common
PUBLIC lsfg-vk-backend)
target_compile_options(lsfg-vk-cli PRIVATE
-Wno-unknown-warning-option
-Wno-unsafe-buffer-usage) # CLI parsing
install(TARGETS lsfg-vk-cli

View file

@ -18,7 +18,7 @@
using namespace lsfgvk::cli;
namespace {
/// print usage information
/// Print usage information
void usage(const std::string& prog) {
std::cerr <<
R"(Validate, benchmark, and debug lsfg-vk.
@ -37,7 +37,7 @@ SUBCOMMAND OPTIONS:
-c, --config <PATH> Optional path to the configuration file
benchmark & debug
-d, --dll <PATH> Path to Lossless.dll
-d, --dll <PATH> Path to lsfg-vk.dll
-a, --allow-fp16 Allow FP16 acceleration
-w, --width <INT> Width of the input frames
-h, --height <INT> Height of the input frames
@ -53,7 +53,7 @@ SUBCOMMAND OPTIONS:
<folder> Path to the debug frames)" << '\n';
}
/// parse the validate command options
/// Parse the validate command options
[[noreturn]] void on_validate(int argc, char** argv) {
validate::Options opts{};
@ -83,7 +83,7 @@ SUBCOMMAND OPTIONS:
std::exit(validate::run(opts));
}
/// parse the benchmark command options
/// Parse the benchmark command options
[[noreturn]] void on_benchmark(int argc, char** argv) {
benchmark::Options opts{};
@ -145,7 +145,7 @@ SUBCOMMAND OPTIONS:
std::exit(benchmark::run(opts));
}
/// parse the debug command options
/// Parse the debug command options
[[noreturn]] void on_debug(int argc, char** argv) {
debug::Options opts{};

View file

@ -1,12 +1,12 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "benchmark.hpp"
#include "lsfg-vk-backend/lsfgvk.hpp"
#include "lsfg-vk-common/helpers/errors.hpp"
#include "lsfg-vk-common/helpers/paths.hpp"
#include "lsfg-vk-common/vulkan/image.hpp"
#include "lsfg-vk-common/vulkan/timeline_semaphore.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include "lsfg-vk/lsfgvk.hpp"
#include <array>
#include <cstddef>
@ -18,7 +18,6 @@
#include <iostream>
#include <optional>
#include <string>
#include <utility>
#include <vector>
#include <time.h>
@ -29,7 +28,7 @@ using namespace lsfgvk::cli;
using namespace lsfgvk::cli::benchmark;
namespace {
// get current time in milliseconds
// Get current time in milliseconds
uint64_t ms() {
struct timespec ts{};
clock_gettime(CLOCK_MONOTONIC, &ts);
@ -41,29 +40,28 @@ namespace {
int benchmark::run(const Options& opts) {
try {
// parse options
// Parse options
if (opts.flow < 0.25F || opts.flow > 1.0F)
throw ls::error("flow scale must be between 0.25 and 1.0");
throw ls::error("Flow scale must be between 0.25 and 1.0");
if (opts.multiplier < 2)
throw ls::error("multiplier must be 2 or greater");
throw ls::error("Multiplier must be 2 or greater");
if (opts.width <= 0 || opts.height <= 0)
throw ls::error("width and height must be positive integers");
throw ls::error("Width and height must be positive integers");
if (opts.duration <= 0)
throw ls::error("duration must be a positive integer");
throw ls::error("Duration must be a positive integer");
const VkExtent2D extent{
static_cast<uint32_t>(opts.width),
static_cast<uint32_t>(opts.height)
};
// create instance
// Create instance
std::string gpu_name{};
const vk::Vulkan vk{
"lsfg-vk-debug", vk::version{2, 0, 0},
"lsfg-vk-debug-engine", vk::version{2, 0, 0},
[opts](const vk::VulkanInstanceFuncs fi,
"lsfg-vk-debug", vk::version{2, 0, 0},
[opts, gpu_name = &gpu_name](const vk::VulkanInstanceFuncs fi,
const std::vector<VkPhysicalDevice>& devices) {
if (!opts.gpu.has_value())
return devices.front();
for (const VkPhysicalDevice& device : devices) {
VkPhysicalDeviceProperties2 props{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2
@ -72,84 +70,81 @@ int benchmark::run(const Options& opts) {
auto& properties = props.properties;
std::array<char, 256> devname = std::to_array(properties.deviceName);
devname.at(255) = '\0'; // ensure null-termination
devname.at(255) = '\0'; // Ensure null-termination
if (std::string(devname.data()) == *opts.gpu)
if (!opts.gpu || std::string(devname.data()) == *opts.gpu) {
*gpu_name = std::string(devname.data());
return device;
}
}
throw ls::error("failed to find specified GPU: " + *opts.gpu);
throw ls::error("Failed to find specified GPU: " + *opts.gpu);
}
};
std::pair<int, int> srcfds{};
const vk::Image frame_0{vk,
extent, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
std::nullopt, &srcfds.first};
const vk::Image frame_1{vk,
extent, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
std::nullopt, &srcfds.second};
std::vector<vk::Image> destimgs{};
std::vector<int> destfds{};
for (int i = 0; i < (opts.multiplier - 1); i++) {
int fd{};
destimgs.emplace_back(vk,
extent, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
std::nullopt,
&fd
);
destfds.push_back(fd);
}
int syncfd{};
const vk::TimelineSemaphore sync{vk, 0, std::nullopt, &syncfd};
// initialize backend
// Initialize backend
std::string dll{};
if (opts.dll.has_value())
dll = *opts.dll;
else
dll = ls::findShaderDll();
lsfgvk::backend::Instance lsfgvk{
[opts](
const std::string& gpu_name,
std::pair<const std::string&, const std::string&>,
const std::optional<std::string>&
) {
return opts.gpu.value_or(gpu_name) == gpu_name;
},
dll, opts.allow_fp16
const lsfgvk::Instance lsfgvk{
gpu_name,
dll,
opts.allow_fp16
};
lsfgvk::Context lsfgvk_ctx{
lsfgvk,
extent.width, extent.height,
opts.flow, opts.performance_mode
};
lsfgvk::backend::Context& lsfgvk_ctx = lsfgvk.openContext(
srcfds, destfds,
syncfd, extent.width, extent.height,
false, 1.0F / opts.flow, opts.performance_mode
);
// run the benchmark
// Import resources
const auto fds{lsfgvk_ctx.exportFds()};
const vk::Image source{vk,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
fds.sourceFd, std::nullopt, 2
};
const vk::Image destination{vk,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
fds.destinationFd
};
const vk::TimelineSemaphore sync{vk,
0,
fds.syncFd
};
// Run the benchmark
const uint32_t total{static_cast<uint32_t>(opts.multiplier) - 1U};
size_t iterations{0};
size_t generated_frames{0};
size_t total_frames{1};
size_t total_frames{0};
size_t idx{1};
uint64_t print_time = ms() + 1000ULL;
const uint64_t end_time = ms() + static_cast<uint64_t>(opts.duration) * 1000ULL;
while (ms() < end_time) {
sync.signal(vk, total_frames++);
lsfgvk.scheduleFrames(lsfgvk_ctx);
lsfgvk_ctx.dispatch(total);
for (size_t i = 0; i < destimgs.size(); i++) {
auto success = sync.wait(vk, total_frames++);
for (size_t i = 0; i < total; i++) {
sync.signal(vk, idx++);
auto success = sync.wait(vk, idx++);
if (!success)
throw ls::error("failed to wait for frame");
throw ls::error("Failed to wait for frame");
total_frames++;
generated_frames++;
}
total_frames++;
iterations++;
if (ms() >= print_time) {
@ -158,25 +153,25 @@ int benchmark::run(const Options& opts) {
}
}
// output results
// Output results
std::cerr << (opts.duration < 40 ? "\r" : "\n");
std::cerr << "benchmark results (ran for " << opts.duration << " seconds):\n";
std::cerr << " iterations: " << iterations << "\n";
std::cerr << " generated frames: " << generated_frames << "\n";
std::cerr << " total frames: " << total_frames << "\n";
std::cerr << "Benchmark results (ran for " << opts.duration << " seconds):\n";
std::cerr << " Iterations: " << iterations << "\n";
std::cerr << " Generated frames: " << generated_frames << "\n";
std::cerr << " Total frames: " << total_frames << "\n";
const auto time = static_cast<double>(opts.duration);
const double fps_generated = static_cast<double>(generated_frames) / time;
const double fps_total = static_cast<double>(total_frames) / time;
std::cerr << std::setprecision(2) << std::fixed;
std::cerr << " fps (generated): " << fps_generated << "fps\n";
std::cerr << " fps (total): " << fps_total << "fps\n";
std::cerr << " FPS (generated): " << fps_generated << "fps\n";
std::cerr << " FPS (total): " << fps_total << "fps\n";
// Wait for idle
lsfgvk_ctx.idle();
// deinitialize lsfg-vk
lsfgvk.closeContext(lsfgvk_ctx);
return EXIT_SUCCESS;
} catch (const std::exception& e) {
std::cerr << "error: " << e.what() << "\n";
std::cerr << "Error: " << e.what() << "\n";
return EXIT_FAILURE;
}
}

View file

@ -7,7 +7,9 @@
namespace lsfgvk::cli::benchmark {
/// options for the "benchmark" command
///
/// Options for the "benchmark" command
///
struct Options {
std::optional<std::string> dll;
bool allow_fp16{false};
@ -22,8 +24,12 @@ namespace lsfgvk::cli::benchmark {
int duration{10};
};
/// run the "benchmark" command
/// @param opts the command options
///
/// Run the "benchmark" command
///
/// @param opts Command options
/// @return Exit code
///
int run(const Options& opts);
}

View file

@ -1,7 +1,6 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "debug.hpp"
#include "lsfg-vk-backend/lsfgvk.hpp"
#include "lsfg-vk-common/helpers/errors.hpp"
#include "lsfg-vk-common/helpers/paths.hpp"
#include "lsfg-vk-common/vulkan/buffer.hpp"
@ -10,6 +9,9 @@
#include "lsfg-vk-common/vulkan/timeline_semaphore.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#define LSFGVK_PRIV
#include "lsfg-vk/lsfgvk.hpp"
#include <algorithm>
#include <array>
#include <cstddef>
@ -22,25 +24,30 @@
#include <iostream>
#include <optional>
#include <string>
#include <utility>
#include <thread>
#include <vector>
#include <dlfcn.h>
#include <renderdoc_app.h>
#include <vulkan/vulkan_core.h>
using namespace lsfgvk::cli;
using namespace lsfgvk::cli::debug;
namespace {
/// uploads an image from a dds file
void upload_image(const vk::Vulkan& vk,
const vk::Image& image, const std::string& path) {
// read image bytecode
/// Upload an image from a DDS file
void uploadDDS(const vk::Vulkan& vk,
const vk::Image& image,
const std::string& path,
uint32_t layer
) {
// Read image data
std::ifstream file(path.data(), std::ios::binary | std::ios::ate);
if (!file.is_open())
throw ls::error("ifstream::ifstream() failed");
std::streamsize size = static_cast<std::streamsize>(file.tellg());
size -= 124 + 4; // dds header and magic bytes
size -= 124 + 4; // DDS header and magic bytes
std::vector<char> code(static_cast<size_t>(size));
file.seekg(124 + 4, std::ios::beg);
@ -49,13 +56,13 @@ namespace {
file.close();
// upload to image
// Upload to image
const vk::Buffer stagingbuf{vk, code.data(), code.size(),
VK_BUFFER_USAGE_TRANSFER_SRC_BIT};
const vk::CommandBuffer cmdbuf{vk};
cmdbuf.begin(vk);
cmdbuf.copyBufferToImage(vk, stagingbuf, image);
cmdbuf.copyBufferToImage(vk, stagingbuf, image, layer);
cmdbuf.end(vk);
const vk::TimelineSemaphore sema{vk, 0};
@ -65,19 +72,19 @@ namespace {
int debug::run(const Options& opts) {
try {
// parse options
// Parse options
if (opts.flow < 0.25F || opts.flow > 1.0F)
throw ls::error("flow scale must be between 0.25 and 1.0");
throw ls::error("Flow scale must be between 0.25 and 1.0");
if (opts.multiplier < 2)
throw ls::error("multiplier must be 2 or greater");
throw ls::error("Multiplier must be 2 or greater");
if (opts.width <= 0 || opts.height <= 0)
throw ls::error("width and height must be positive integers");
throw ls::error("Width and height must be positive integers");
const VkExtent2D extent{
static_cast<uint32_t>(opts.width),
static_cast<uint32_t>(opts.height)
};
if (!std::filesystem::exists(opts.path))
throw ls::error("debug path does not exist: " + opts.path.string());
throw ls::error("Debug path does not exist: " + opts.path.string());
std::vector<std::filesystem::path> paths{};
for (const auto& entry : std::filesystem::directory_iterator(opts.path))
paths.push_back(entry.path());
@ -87,23 +94,22 @@ int debug::run(const Options& opts) {
auto norm_a = fa.find_first_of('.');
if (norm_a == std::string::npos)
throw ls::error("invalid debug file name: " + fa);
throw ls::error("Invalid debug file name: " + fa);
auto norm_b = fb.find_first_of('.');
if (norm_b == std::string::npos)
throw ls::error("invalid debug file name: " + fb);
throw ls::error("Invalid debug file name: " + fb);
return std::stoi(fa.substr(0, norm_a)) < std::stoi(fb.substr(0, norm_b));
});
// create instance
// Create instance
std::string gpu_name{};
const vk::Vulkan vk{
"lsfg-vk-debug", vk::version{2, 0, 0},
"lsfg-vk-debug-engine", vk::version{2, 0, 0},
[opts](const vk::VulkanInstanceFuncs fi,
"lsfg-vk-debug", vk::version{2, 0, 0},
[opts, gpu_name = &gpu_name](const vk::VulkanInstanceFuncs fi,
const std::vector<VkPhysicalDevice>& devices) {
if (!opts.gpu.has_value())
return devices.front();
for (const VkPhysicalDevice& device : devices) {
VkPhysicalDeviceProperties2 props{
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2
@ -112,87 +118,108 @@ int debug::run(const Options& opts) {
auto& properties = props.properties;
std::array<char, 256> devname = std::to_array(properties.deviceName);
devname.at(255) = '\0'; // ensure null-termination
devname.at(255) = '\0'; // Ensure null-termination
if (std::string(devname.data()) == *opts.gpu)
if (!opts.gpu || std::string(devname.data()) == *opts.gpu) {
*gpu_name = std::string(devname.data());
return device;
}
}
throw ls::error("failed to find specified GPU: " + *opts.gpu);
throw ls::error("Failed to find specified GPU: " + *opts.gpu);
}
};
std::pair<int, int> srcfds{};
const vk::Image frame_0{vk,
extent, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
std::nullopt, &srcfds.first};
const vk::Image frame_1{vk,
extent, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
std::nullopt, &srcfds.second};
std::vector<vk::Image> destimgs{};
std::vector<int> destfds{};
for (int i = 0; i < (opts.multiplier - 1); i++) {
int fd{};
destimgs.emplace_back(vk,
extent, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
std::nullopt,
&fd
);
destfds.push_back(fd);
}
int syncfd{};
const vk::TimelineSemaphore sync{vk, 0, std::nullopt, &syncfd};
// initialize backend
// Initialize backend
std::string dll{};
if (opts.dll.has_value())
dll = *opts.dll;
else
dll = ls::findShaderDll();
lsfgvk::backend::Instance lsfgvk{
[opts](
const std::string& gpu_name,
std::pair<const std::string&, const std::string&>,
const std::optional<std::string>&
) {
return opts.gpu.value_or(gpu_name) == gpu_name;
},
dll, opts.allow_fp16
};
lsfgvk::backend::Context& lsfgvk_ctx = lsfgvk.openContext(
srcfds, destfds,
syncfd, extent.width, extent.height,
false, 1.0F / opts.flow, opts.performance_mode
);
// render destination images
size_t idx{1};
for (size_t j = 0; j < paths.size(); j++) {
upload_image(vk,
j % 2 == 0 ? frame_0 : frame_1,
paths.at(j).string()
const lsfgvk::Instance lsfgvk{
gpu_name,
dll,
opts.allow_fp16
};
lsfgvk::Context lsfgvk_ctx{
lsfgvk,
extent.width, extent.height,
opts.flow, opts.performance_mode
};
// Import resources
const auto fds{lsfgvk_ctx.exportFds()};
const vk::Image source{vk,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
fds.sourceFd, std::nullopt, 2
};
const vk::Image destination{vk,
extent,
VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
fds.destinationFd
};
const vk::TimelineSemaphore sync{vk,
0,
fds.syncFd
};
// Try to open RenderDoc
RENDERDOC_API_1_6_0* rdoc_api{nullptr};
RENDERDOC_DevicePointer rdoc_device{nullptr};
if (void* module = dlopen("librenderdoc.so", RTLD_NOW | RTLD_NOLOAD)) {
void* func{dlsym(module, "RENDERDOC_GetAPI")};
auto* GetAPI{reinterpret_cast<pRENDERDOC_GetAPI>(func)}; // NOLINT (unsafe cast)
GetAPI(
eRENDERDOC_API_Version_1_0_0,
reinterpret_cast<void**>(&rdoc_api) // NOLINT (unsafe cast)
);
sync.signal(vk, idx++);
lsfgvk.scheduleFrames(lsfgvk_ctx);
for (size_t i = 0; i < destimgs.size(); i++) {
auto success = sync.wait(vk, idx++);
if (!success)
throw ls::error("failed to wait for frame");
}
rdoc_device = RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(lsfgvk._instance());
}
// deinitialize lsfg-vk
lsfgvk.closeContext(lsfgvk_ctx);
// Render destination images
const uint32_t total{static_cast<uint32_t>(opts.multiplier) - 1U};
size_t idx{1};
for (size_t j = 0; j < paths.size(); j++) {
uploadDDS(vk, source, paths.at(j).string(), j % 2);
if (rdoc_api) {
rdoc_api->StartFrameCapture(rdoc_device, nullptr);
}
std::thread signal_thread{[&sync, &vk, &idx, total] {
for (size_t i = 0; i < total; i++) {
sync.signal(vk, idx++);
auto success = sync.wait(vk, idx++);
if (!success)
throw ls::error("Failed to wait for frame");
}
}};
lsfgvk_ctx.dispatch(total);
if (rdoc_api) {
lsfgvk_ctx.idle();
rdoc_api->EndFrameCapture(rdoc_device, nullptr);
}
signal_thread.join();
}
// Wait for idle
lsfgvk_ctx.idle();
return EXIT_SUCCESS;
} catch (const std::exception& e) {
std::cerr << "error: " << e.what() << "\n";
std::cerr << "Error: " << e.what() << "\n";
return EXIT_FAILURE;
}
}

View file

@ -8,23 +8,29 @@
namespace lsfgvk::cli::debug {
/// options for the "debug" command
///
/// Options for the "debug" command
///
struct Options {
std::optional<std::string> dll;
bool allow_fp16{true};
bool allow_fp16{false};
int width{1920};
int height{1080};
float flow{0.85F};
float flow{1.0F};
int multiplier{2};
bool performance_mode{true};
bool performance_mode{false};
std::optional<std::string> gpu;
std::filesystem::path path;
};
/// run the "debug" command
/// @param opts the command options
///
/// Run the "debug" command
///
/// @param opts Command options
/// @return Exit code
///
int run(const Options& opts);
}

View file

@ -16,7 +16,7 @@ int validate::run(const Options& opts) {
path = *opts.config;
if (!std::filesystem::exists(path)) {
std::cerr << "Validation failed: configuration file does not exist\n";
std::cerr << "Validation failed: Configuration file does not exist\n";
return 1;
}

View file

@ -7,13 +7,19 @@
namespace lsfgvk::cli::validate {
/// options for the "validate" command
///
/// Options for the "validate" command
///
struct Options {
std::optional<std::string> config;
};
/// run the "validate" command
/// @param opts the command options
///
/// Run the "validate" command
///
/// @param opts Command options
/// @return Exit code
///
int run(const Options& opts);
}

View file

@ -0,0 +1,875 @@
/******************************************************************************
* The MIT License (MIT)
*
* Copyright (c) 2015-2026 Baldur Karlsson
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
******************************************************************************/
#pragma once
//////////////////////////////////////////////////////////////////////////////////////////////////
//
// Documentation for the API is available at https://renderdoc.org/docs/in_application_api.html
//
#if !defined(RENDERDOC_NO_STDINT)
#include <stdint.h>
#endif
#if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER)
#define RENDERDOC_CC __cdecl
#elif defined(__linux__) || defined(__FreeBSD__) || defined(__sun__) || defined(__OpenBSD__)
#define RENDERDOC_CC
#elif defined(__APPLE__)
#define RENDERDOC_CC
#else
#error "Unknown platform"
#endif
#ifdef __cplusplus
extern "C" {
#endif
//////////////////////////////////////////////////////////////////////////////////////////////////
// Constants not used directly in below API
// This is a GUID/magic value used for when applications pass a path where shader debug
// information can be found to match up with a stripped shader.
// the define can be used like so: const GUID RENDERDOC_ShaderDebugMagicValue =
// RENDERDOC_ShaderDebugMagicValue_value
#define RENDERDOC_ShaderDebugMagicValue_struct \
{ \
0xeab25520, 0x6670, 0x4865, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \
}
// as an alternative when you want a byte array (assuming x86 endianness):
#define RENDERDOC_ShaderDebugMagicValue_bytearray \
{ \
0x20, 0x55, 0xb2, 0xea, 0x70, 0x66, 0x65, 0x48, 0x84, 0x29, 0x6c, 0x8, 0x51, 0x54, 0x00, 0xff \
}
// truncated version when only a uint64_t is available (e.g. Vulkan tags):
#define RENDERDOC_ShaderDebugMagicValue_truncated 0x48656670eab25520ULL
// this is a magic value for vulkan user tags to indicate which dispatchable API objects are which
// for object annotations
#define RENDERDOC_APIObjectAnnotationHelper 0xfbb3b337b664d0adULL
//////////////////////////////////////////////////////////////////////////////////////////////////
// RenderDoc capture options
//
typedef enum RENDERDOC_CaptureOption
{
// Allow the application to enable vsync
//
// Default - enabled
//
// 1 - The application can enable or disable vsync at will
// 0 - vsync is force disabled
eRENDERDOC_Option_AllowVSync = 0,
// Allow the application to enable fullscreen
//
// Default - enabled
//
// 1 - The application can enable or disable fullscreen at will
// 0 - fullscreen is force disabled
eRENDERDOC_Option_AllowFullscreen = 1,
// Record API debugging events and messages
//
// Default - disabled
//
// 1 - Enable built-in API debugging features and records the results into
// the capture, which is matched up with events on replay
// 0 - no API debugging is forcibly enabled
eRENDERDOC_Option_APIValidation = 2,
eRENDERDOC_Option_DebugDeviceMode = 2, // deprecated name of this enum
// Capture CPU callstacks for API events
//
// Default - disabled
//
// 1 - Enables capturing of callstacks
// 0 - no callstacks are captured
eRENDERDOC_Option_CaptureCallstacks = 3,
// When capturing CPU callstacks, only capture them from actions.
// This option does nothing without the above option being enabled
//
// Default - disabled
//
// 1 - Only captures callstacks for actions.
// Ignored if CaptureCallstacks is disabled
// 0 - Callstacks, if enabled, are captured for every event.
eRENDERDOC_Option_CaptureCallstacksOnlyDraws = 4,
eRENDERDOC_Option_CaptureCallstacksOnlyActions = 4,
// Specify a delay in seconds to wait for a debugger to attach, after
// creating or injecting into a process, before continuing to allow it to run.
//
// 0 indicates no delay, and the process will run immediately after injection
//
// Default - 0 seconds
//
eRENDERDOC_Option_DelayForDebugger = 5,
// Verify buffer access. This includes checking the memory returned by a Map() call to
// detect any out-of-bounds modification, as well as initialising buffers with undefined contents
// to a marker value to catch use of uninitialised memory.
//
// NOTE: This option is only valid for OpenGL and D3D11. Explicit APIs such as D3D12 and Vulkan do
// not do the same kind of interception & checking and undefined contents are really undefined.
//
// Default - disabled
//
// 1 - Verify buffer access
// 0 - No verification is performed, and overwriting bounds may cause crashes or corruption in
// RenderDoc.
eRENDERDOC_Option_VerifyBufferAccess = 6,
// The old name for eRENDERDOC_Option_VerifyBufferAccess was eRENDERDOC_Option_VerifyMapWrites.
// This option now controls the filling of uninitialised buffers with 0xdddddddd which was
// previously always enabled
eRENDERDOC_Option_VerifyMapWrites = eRENDERDOC_Option_VerifyBufferAccess,
// Hooks any system API calls that create child processes, and injects
// RenderDoc into them recursively with the same options.
//
// Default - disabled
//
// 1 - Hooks into spawned child processes
// 0 - Child processes are not hooked by RenderDoc
eRENDERDOC_Option_HookIntoChildren = 7,
// By default RenderDoc only includes resources in the final capture necessary
// for that frame, this allows you to override that behaviour.
//
// Default - disabled
//
// 1 - all live resources at the time of capture are included in the capture
// and available for inspection
// 0 - only the resources referenced by the captured frame are included
eRENDERDOC_Option_RefAllResources = 8,
// **NOTE**: As of RenderDoc v1.1 this option has been deprecated. Setting or
// getting it will be ignored, to allow compatibility with older versions.
// In v1.1 the option acts as if it's always enabled.
//
// By default RenderDoc skips saving initial states for resources where the
// previous contents don't appear to be used, assuming that writes before
// reads indicate previous contents aren't used.
//
// Default - disabled
//
// 1 - initial contents at the start of each captured frame are saved, even if
// they are later overwritten or cleared before being used.
// 0 - unless a read is detected, initial contents will not be saved and will
// appear as black or empty data.
eRENDERDOC_Option_SaveAllInitials = 9,
// In APIs that allow for the recording of command lists to be replayed later,
// RenderDoc may choose to not capture command lists before a frame capture is
// triggered, to reduce overheads. This means any command lists recorded once
// and replayed many times will not be available and may cause a failure to
// capture.
//
// NOTE: This is only true for APIs where multithreading is difficult or
// discouraged. Newer APIs like Vulkan and D3D12 will ignore this option
// and always capture all command lists since the API is heavily oriented
// around it and the overheads have been reduced by API design.
//
// 1 - All command lists are captured from the start of the application
// 0 - Command lists are only captured if their recording begins during
// the period when a frame capture is in progress.
eRENDERDOC_Option_CaptureAllCmdLists = 10,
// Mute API debugging output when the API validation mode option is enabled
//
// Default - enabled
//
// 1 - Mute any API debug messages from being displayed or passed through
// 0 - API debugging is displayed as normal
eRENDERDOC_Option_DebugOutputMute = 11,
// Option to allow vendor extensions to be used even when they may be
// incompatible with RenderDoc and cause corrupted replays or crashes.
//
// Default - inactive
//
// No values are documented, this option should only be used when absolutely
// necessary as directed by a RenderDoc developer.
eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12,
// Define a soft memory limit which some APIs may aim to keep overhead under where
// possible. Anything above this limit will where possible be saved directly to disk during
// capture.
// This will cause increased disk space use (which may cause a capture to fail if disk space is
// exhausted) as well as slower capture times.
//
// Not all memory allocations may be deferred like this so it is not a guarantee of a memory
// limit.
//
// Units are in MBs, suggested values would range from 200MB to 1000MB.
//
// Default - 0 Megabytes
eRENDERDOC_Option_SoftMemoryLimit = 13,
} RENDERDOC_CaptureOption;
// Sets an option that controls how RenderDoc behaves on capture.
//
// Returns 1 if the option and value are valid
// Returns 0 if either is invalid and the option is unchanged
typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionU32)(RENDERDOC_CaptureOption opt, uint32_t val);
typedef int(RENDERDOC_CC *pRENDERDOC_SetCaptureOptionF32)(RENDERDOC_CaptureOption opt, float val);
// Gets the current value of an option as a uint32_t
//
// If the option is invalid, 0xffffffff is returned
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_CaptureOption opt);
// Gets the current value of an option as a float
//
// If the option is invalid, -FLT_MAX is returned
typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt);
typedef enum RENDERDOC_InputButton
{
// '0' - '9' matches ASCII values
eRENDERDOC_Key_0 = 0x30,
eRENDERDOC_Key_1 = 0x31,
eRENDERDOC_Key_2 = 0x32,
eRENDERDOC_Key_3 = 0x33,
eRENDERDOC_Key_4 = 0x34,
eRENDERDOC_Key_5 = 0x35,
eRENDERDOC_Key_6 = 0x36,
eRENDERDOC_Key_7 = 0x37,
eRENDERDOC_Key_8 = 0x38,
eRENDERDOC_Key_9 = 0x39,
// 'A' - 'Z' matches ASCII values
eRENDERDOC_Key_A = 0x41,
eRENDERDOC_Key_B = 0x42,
eRENDERDOC_Key_C = 0x43,
eRENDERDOC_Key_D = 0x44,
eRENDERDOC_Key_E = 0x45,
eRENDERDOC_Key_F = 0x46,
eRENDERDOC_Key_G = 0x47,
eRENDERDOC_Key_H = 0x48,
eRENDERDOC_Key_I = 0x49,
eRENDERDOC_Key_J = 0x4A,
eRENDERDOC_Key_K = 0x4B,
eRENDERDOC_Key_L = 0x4C,
eRENDERDOC_Key_M = 0x4D,
eRENDERDOC_Key_N = 0x4E,
eRENDERDOC_Key_O = 0x4F,
eRENDERDOC_Key_P = 0x50,
eRENDERDOC_Key_Q = 0x51,
eRENDERDOC_Key_R = 0x52,
eRENDERDOC_Key_S = 0x53,
eRENDERDOC_Key_T = 0x54,
eRENDERDOC_Key_U = 0x55,
eRENDERDOC_Key_V = 0x56,
eRENDERDOC_Key_W = 0x57,
eRENDERDOC_Key_X = 0x58,
eRENDERDOC_Key_Y = 0x59,
eRENDERDOC_Key_Z = 0x5A,
// leave the rest of the ASCII range free
// in case we want to use it later
eRENDERDOC_Key_NonPrintable = 0x100,
eRENDERDOC_Key_Divide,
eRENDERDOC_Key_Multiply,
eRENDERDOC_Key_Subtract,
eRENDERDOC_Key_Plus,
eRENDERDOC_Key_F1,
eRENDERDOC_Key_F2,
eRENDERDOC_Key_F3,
eRENDERDOC_Key_F4,
eRENDERDOC_Key_F5,
eRENDERDOC_Key_F6,
eRENDERDOC_Key_F7,
eRENDERDOC_Key_F8,
eRENDERDOC_Key_F9,
eRENDERDOC_Key_F10,
eRENDERDOC_Key_F11,
eRENDERDOC_Key_F12,
eRENDERDOC_Key_Home,
eRENDERDOC_Key_End,
eRENDERDOC_Key_Insert,
eRENDERDOC_Key_Delete,
eRENDERDOC_Key_PageUp,
eRENDERDOC_Key_PageDn,
eRENDERDOC_Key_Backspace,
eRENDERDOC_Key_Tab,
eRENDERDOC_Key_PrtScrn,
eRENDERDOC_Key_Pause,
eRENDERDOC_Key_Max,
} RENDERDOC_InputButton;
// Sets which key or keys can be used to toggle focus between multiple windows
//
// If keys is NULL or num is 0, toggle keys will be disabled
typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton *keys, int num);
// Sets which key or keys can be used to capture the next frame
//
// If keys is NULL or num is 0, captures keys will be disabled
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num);
typedef enum RENDERDOC_OverlayBits
{
// This single bit controls whether the overlay is enabled or disabled globally
eRENDERDOC_Overlay_Enabled = 0x1,
// Show the average framerate over several seconds as well as min/max
eRENDERDOC_Overlay_FrameRate = 0x2,
// Show the current frame number
eRENDERDOC_Overlay_FrameNumber = 0x4,
// Show a list of recent captures, and how many captures have been made
eRENDERDOC_Overlay_CaptureList = 0x8,
// Default values for the overlay mask
eRENDERDOC_Overlay_Default = (eRENDERDOC_Overlay_Enabled | eRENDERDOC_Overlay_FrameRate |
eRENDERDOC_Overlay_FrameNumber | eRENDERDOC_Overlay_CaptureList),
// Enable all bits
eRENDERDOC_Overlay_All = 0x7ffffff,
// Disable all bits
eRENDERDOC_Overlay_None = 0,
} RENDERDOC_OverlayBits;
// returns the overlay bits that have been set
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetOverlayBits)(void);
// sets the overlay bits with an and & or mask
typedef void(RENDERDOC_CC *pRENDERDOC_MaskOverlayBits)(uint32_t And, uint32_t Or);
// this function will attempt to remove RenderDoc's hooks in the application.
//
// Note: that this can only work correctly if done immediately after
// the module is loaded, before any API work happens. RenderDoc will remove its
// injected hooks and shut down. Behaviour is undefined if this is called
// after any API functions have been called, and there is still no guarantee of
// success.
typedef void(RENDERDOC_CC *pRENDERDOC_RemoveHooks)(void);
// DEPRECATED: compatibility for code compiled against pre-1.4.1 headers.
typedef pRENDERDOC_RemoveHooks pRENDERDOC_Shutdown;
// This function will unload RenderDoc's crash handler.
//
// If you use your own crash handler and don't want RenderDoc's handler to
// intercede, you can call this function to unload it and any unhandled
// exceptions will pass to the next handler.
typedef void(RENDERDOC_CC *pRENDERDOC_UnloadCrashHandler)(void);
// Sets the capture file path template
//
// pathtemplate is a UTF-8 string that gives a template for how captures will be named
// and where they will be saved.
//
// Any extension is stripped off the path, and captures are saved in the directory
// specified, and named with the filename and the frame number appended. If the
// directory does not exist it will be created, including any parent directories.
//
// If pathtemplate is NULL, the template will remain unchanged
//
// Example:
//
// SetCaptureFilePathTemplate("my_captures/example");
//
// Capture #1 -> my_captures/example_frame123.rdc
// Capture #2 -> my_captures/example_frame456.rdc
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFilePathTemplate)(const char *pathtemplate);
// returns the current capture path template, see SetCaptureFileTemplate above, as a UTF-8 string
typedef const char *(RENDERDOC_CC *pRENDERDOC_GetCaptureFilePathTemplate)(void);
// DEPRECATED: compatibility for code compiled against pre-1.1.2 headers.
typedef pRENDERDOC_SetCaptureFilePathTemplate pRENDERDOC_SetLogFilePathTemplate;
typedef pRENDERDOC_GetCaptureFilePathTemplate pRENDERDOC_GetLogFilePathTemplate;
// returns the number of captures that have been made
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetNumCaptures)(void);
// This function returns the details of a capture, by index. New captures are added
// to the end of the list.
//
// filename will be filled with the absolute path to the capture file, as a UTF-8 string
// pathlength will be written with the length in bytes of the filename string
// timestamp will be written with the time of the capture, in seconds since the Unix epoch
//
// Any of the parameters can be NULL and they'll be skipped.
//
// The function will return 1 if the capture index is valid, or 0 if the index is invalid
// If the index is invalid, the values will be unchanged
//
// Note: when captures are deleted in the UI they will remain in this list, so the
// capture path may not exist anymore.
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCapture)(uint32_t idx, char *filename,
uint32_t *pathlength, uint64_t *timestamp);
// Sets the comments associated with a capture file. These comments are displayed in the
// UI program when opening.
//
// filePath should be a path to the capture file to add comments to. If set to NULL or ""
// the most recent capture file created made will be used instead.
// comments should be a NULL-terminated UTF-8 string to add as comments.
//
// Any existing comments will be overwritten.
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureFileComments)(const char *filePath,
const char *comments);
// returns 1 if the RenderDoc UI is connected to this application, 0 otherwise
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsTargetControlConnected)(void);
// DEPRECATED: compatibility for code compiled against pre-1.1.1 headers.
// This was renamed to IsTargetControlConnected in API 1.1.1, the old typedef is kept here for
// backwards compatibility with old code, it is castable either way since it's ABI compatible
// as the same function pointer type.
typedef pRENDERDOC_IsTargetControlConnected pRENDERDOC_IsRemoteAccessConnected;
// This function will launch the Replay UI associated with the RenderDoc library injected
// into the running application.
//
// if connectTargetControl is 1, the Replay UI will be launched with a command line parameter
// to connect to this application
// cmdline is the rest of the command line, as a UTF-8 string. E.g. a captures to open
// if cmdline is NULL, the command line will be empty.
//
// returns the PID of the replay UI if successful, 0 if not successful.
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTargetControl,
const char *cmdline);
// RenderDoc can return a higher version than requested if it's backwards compatible,
// this function returns the actual version returned. If a parameter is NULL, it will be
// ignored and the others will be filled out.
typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch);
// Requests that the replay UI show itself (if hidden or not the current top window). This can be
// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle
// showing the UI after making a capture.
//
// This will return 1 if the request was successfully passed on, though it's not guaranteed that
// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current
// target control connection to make such a request, or if there was another error
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)(void);
//////////////////////////////////////////////////////////////////////////
// Capturing functions
//
// A device pointer is a pointer to the API's root handle.
//
// This would be an ID3D11Device, HGLRC/GLXContext, ID3D12Device, etc
typedef void *RENDERDOC_DevicePointer;
// A window handle is the OS's native window handle
//
// This would be an HWND, GLXDrawable, etc
typedef void *RENDERDOC_WindowHandle;
// A helper macro for Vulkan, where the device handle cannot be used directly.
//
// Passing the VkInstance to this macro will return the RENDERDOC_DevicePointer to use.
//
// Specifically, the value needed is the dispatch table pointer, which sits as the first
// pointer-sized object in the memory pointed to by the VkInstance. Thus we cast to a void** and
// indirect once.
#define RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(inst) (*((void **)(inst)))
// This sets the RenderDoc in-app overlay in the API/window pair as 'active' and it will
// respond to keypresses. Neither parameter can be NULL
typedef void(RENDERDOC_CC *pRENDERDOC_SetActiveWindow)(RENDERDOC_DevicePointer device,
RENDERDOC_WindowHandle wndHandle);
// capture the next frame on whichever window and API is currently considered active
typedef void(RENDERDOC_CC *pRENDERDOC_TriggerCapture)(void);
// capture the next N frames on whichever window and API is currently considered active
typedef void(RENDERDOC_CC *pRENDERDOC_TriggerMultiFrameCapture)(uint32_t numFrames);
// When choosing either a device pointer or a window handle to capture, you can pass NULL.
// Passing NULL specifies a 'wildcard' match against anything. This allows you to specify
// any API rendering to a specific window, or a specific API instance rendering to any window,
// or in the simplest case of one window and one API, you can just pass NULL for both.
//
// In either case, if there are two or more possible matching (device,window) pairs it
// is undefined which one will be captured.
//
// Note: for headless rendering you can pass NULL for the window handle and either specify
// a device pointer or leave it NULL as above.
// Immediately starts capturing API calls on the specified device pointer and window handle.
//
// If there is no matching thing to capture (e.g. no supported API has been initialised),
// this will do nothing.
//
// The results are undefined (including crashes) if two captures are started overlapping,
// even on separate devices and/oror windows.
typedef void(RENDERDOC_CC *pRENDERDOC_StartFrameCapture)(RENDERDOC_DevicePointer device,
RENDERDOC_WindowHandle wndHandle);
// Returns whether or not a frame capture is currently ongoing anywhere.
//
// This will return 1 if a capture is ongoing, and 0 if there is no capture running
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_IsFrameCapturing)(void);
// Ends capturing immediately.
//
// This will return 1 if the capture succeeded, and 0 if there was an error capturing.
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePointer device,
RENDERDOC_WindowHandle wndHandle);
// Ends capturing immediately and discard any data stored without saving to disk.
//
// This will return 1 if the capture was discarded, and 0 if there was an error or no capture
// was in progress
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device,
RENDERDOC_WindowHandle wndHandle);
// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom
// title to the capture produced which will be displayed in the UI.
//
// If multiple captures are ongoing, this title will be applied to the first capture to end after
// this call. The second capture to end will have no title, unless this function is called again.
//
// Calling this function has no effect if no capture is currently running, and if it is called
// multiple times only the last title will be used.
typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title);
// Annotations API:
//
// These functions allow you to specify annotations either on a per-command level, or a per-object
// level.
//
// Basic types of annotations are supported, as well as vector versions and references to API objects.
//
// The annotations are stored as keys, with the key being a dot-separated path allowing arbitrary
// nesting and user organisation. The keys are sorted in human order so `foo.2.bar` will be displayed
// before `foo.10.bar` to allow creation of arrays if desired.
//
// Deleting an annotation can be done by assigning an empty value to it.
// the type of an annotation value, or Empty to delete an annotation
typedef enum RENDERDOC_AnnotationType
{
eRENDERDOC_Empty,
eRENDERDOC_Bool,
eRENDERDOC_Int32,
eRENDERDOC_UInt32,
eRENDERDOC_Int64,
eRENDERDOC_UInt64,
eRENDERDOC_Float,
eRENDERDOC_Double,
eRENDERDOC_String,
eRENDERDOC_APIObject,
eRENDERDOC_AnnotationMax = 0x7FFFFFFF,
} RENDERDOC_AnnotationType;
// a union with vector annotation value data
typedef union RENDERDOC_AnnotationVectorValue
{
bool boolean[4];
int32_t int32[4];
int64_t int64[4];
uint32_t uint32[4];
uint64_t uint64[4];
float float32[4];
double float64[4];
} RENDERDOC_AnnotationVectorValue;
// a union with scalar annotation value data
typedef union RENDERDOC_AnnotationValue
{
bool boolean;
int32_t int32;
int64_t int64;
uint32_t uint32;
uint64_t uint64;
float float32;
double float64;
RENDERDOC_AnnotationVectorValue vector;
const char *string;
void *apiObject;
} RENDERDOC_AnnotationValue;
// a struct for specifying a GL object, as we don't have pointers we can use so instead we specify a
// pointer to this struct giving both the type and the name
typedef struct RENDERDOC_GLResourceReference
{
// this is the same GLenum identifier as passed to glObjectLabel
uint32_t identifier;
uint32_t name;
} GLResourceReference;
// simple C++ helpers to avoid the need for a temporary objects for value passing and GL object specification
#ifdef __cplusplus
struct RDGLObjectHelper
{
RENDERDOC_GLResourceReference gl;
RDGLObjectHelper(uint32_t identifier, uint32_t name)
{
gl.identifier = identifier;
gl.name = name;
}
operator RENDERDOC_GLResourceReference *() { return &gl; }
};
struct RDAnnotationHelper
{
RENDERDOC_AnnotationValue val;
RDAnnotationHelper(bool b) { val.boolean = b; }
RDAnnotationHelper(int32_t i) { val.int32 = i; }
RDAnnotationHelper(int64_t i) { val.int64 = i; }
RDAnnotationHelper(uint32_t i) { val.uint32 = i; }
RDAnnotationHelper(uint64_t i) { val.uint64 = i; }
RDAnnotationHelper(float f) { val.float32 = f; }
RDAnnotationHelper(double d) { val.float64 = d; }
RDAnnotationHelper(const char *s) { val.string = s; }
operator RENDERDOC_AnnotationValue *() { return &val; }
};
#endif
// The device is specified in the same way as other API calls that take a RENDERDOC_DevicePointer
// to specify the device.
//
// The object or queue/commandbuffer will depend on the graphics API in question.
//
// Return value:
// 0 - The annotation was applied successfully.
// 1 - The device is unknown/invalid
// 2 - The device is valid but the annotation is not supported for API-specific reasons, such as an
// unrecognised or invalid object or queue/commandbuffer
// 3 - The call is ill-formed or invalid e.g. empty is specified with a value pointer, or non-empty
// is specified with a NULL value pointer
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_SetObjectAnnotation)(RENDERDOC_DevicePointer device,
void *object, const char *key,
RENDERDOC_AnnotationType valueType,
uint32_t valueVectorWidth,
const RENDERDOC_AnnotationValue *value);
typedef uint32_t(RENDERDOC_CC *pRENDERDOC_SetCommandAnnotation)(
RENDERDOC_DevicePointer device, void *queueOrCommandBuffer, const char *key,
RENDERDOC_AnnotationType valueType, uint32_t valueVectorWidth,
const RENDERDOC_AnnotationValue *value);
//////////////////////////////////////////////////////////////////////////////////////////////////
// RenderDoc API versions
//
// RenderDoc uses semantic versioning (http://semver.org/).
//
// MAJOR version is incremented when incompatible API changes happen.
// MINOR version is incremented when functionality is added in a backwards-compatible manner.
// PATCH version is incremented when backwards-compatible bug fixes happen.
//
// Note that this means the API returned can be higher than the one you might have requested.
// e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned
// instead of 1.0.0. You can check this with the GetAPIVersion entry point
typedef enum RENDERDOC_Version
{
eRENDERDOC_API_Version_1_0_0 = 10000, // RENDERDOC_API_1_0_0 = 1 00 00
eRENDERDOC_API_Version_1_0_1 = 10001, // RENDERDOC_API_1_0_1 = 1 00 01
eRENDERDOC_API_Version_1_0_2 = 10002, // RENDERDOC_API_1_0_2 = 1 00 02
eRENDERDOC_API_Version_1_1_0 = 10100, // RENDERDOC_API_1_1_0 = 1 01 00
eRENDERDOC_API_Version_1_1_1 = 10101, // RENDERDOC_API_1_1_1 = 1 01 01
eRENDERDOC_API_Version_1_1_2 = 10102, // RENDERDOC_API_1_1_2 = 1 01 02
eRENDERDOC_API_Version_1_2_0 = 10200, // RENDERDOC_API_1_2_0 = 1 02 00
eRENDERDOC_API_Version_1_3_0 = 10300, // RENDERDOC_API_1_3_0 = 1 03 00
eRENDERDOC_API_Version_1_4_0 = 10400, // RENDERDOC_API_1_4_0 = 1 04 00
eRENDERDOC_API_Version_1_4_1 = 10401, // RENDERDOC_API_1_4_1 = 1 04 01
eRENDERDOC_API_Version_1_4_2 = 10402, // RENDERDOC_API_1_4_2 = 1 04 02
eRENDERDOC_API_Version_1_5_0 = 10500, // RENDERDOC_API_1_5_0 = 1 05 00
eRENDERDOC_API_Version_1_6_0 = 10600, // RENDERDOC_API_1_6_0 = 1 06 00
eRENDERDOC_API_Version_1_7_0 = 10700, // RENDERDOC_API_1_7_0 = 1 07 00
} RENDERDOC_Version;
// API version changelog:
//
// 1.0.0 - initial release
// 1.0.1 - Bugfix: IsFrameCapturing() was returning false for captures that were triggered
// by keypress or TriggerCapture, instead of Start/EndFrameCapture.
// 1.0.2 - Refactor: Renamed eRENDERDOC_Option_DebugDeviceMode to eRENDERDOC_Option_APIValidation
// 1.1.0 - Add feature: TriggerMultiFrameCapture(). Backwards compatible with 1.0.x since the new
// function pointer is added to the end of the struct, the original layout is identical
// 1.1.1 - Refactor: Renamed remote access to target control (to better disambiguate from remote
// replay/remote server concept in replay UI)
// 1.1.2 - Refactor: Renamed "log file" in function names to just capture, to clarify that these
// are captures and not debug logging files. This is the first API version in the v1.0
// branch.
// 1.2.0 - Added feature: SetCaptureFileComments() to add comments to a capture file that will be
// displayed in the UI program on load.
// 1.3.0 - Added feature: New capture option eRENDERDOC_Option_AllowUnsupportedVendorExtensions
// which allows users to opt-in to allowing unsupported vendor extensions to function.
// Should be used at the user's own risk.
// Refactor: Renamed eRENDERDOC_Option_VerifyMapWrites to
// eRENDERDOC_Option_VerifyBufferAccess, which now also controls initialisation to
// 0xdddddddd of uninitialised buffer contents.
// 1.4.0 - Added feature: DiscardFrameCapture() to discard a frame capture in progress and stop
// capturing without saving anything to disk.
// 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening
// 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option.
// 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected
// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a
// capture made with StartFrameCapture() or EndFrameCapture()
// 1.7.0 - Added feature: SetObjectAnnotation() / SetCommandAnnotation() for adding rich
// annotations to objects and command streams
typedef struct RENDERDOC_API_1_7_0
{
pRENDERDOC_GetAPIVersion GetAPIVersion;
pRENDERDOC_SetCaptureOptionU32 SetCaptureOptionU32;
pRENDERDOC_SetCaptureOptionF32 SetCaptureOptionF32;
pRENDERDOC_GetCaptureOptionU32 GetCaptureOptionU32;
pRENDERDOC_GetCaptureOptionF32 GetCaptureOptionF32;
pRENDERDOC_SetFocusToggleKeys SetFocusToggleKeys;
pRENDERDOC_SetCaptureKeys SetCaptureKeys;
pRENDERDOC_GetOverlayBits GetOverlayBits;
pRENDERDOC_MaskOverlayBits MaskOverlayBits;
// Shutdown was renamed to RemoveHooks in 1.4.1.
// These unions allow old code to continue compiling without changes
union
{
pRENDERDOC_Shutdown Shutdown;
pRENDERDOC_RemoveHooks RemoveHooks;
};
pRENDERDOC_UnloadCrashHandler UnloadCrashHandler;
// Get/SetLogFilePathTemplate was renamed to Get/SetCaptureFilePathTemplate in 1.1.2.
// These unions allow old code to continue compiling without changes
union
{
// deprecated name
pRENDERDOC_SetLogFilePathTemplate SetLogFilePathTemplate;
// current name
pRENDERDOC_SetCaptureFilePathTemplate SetCaptureFilePathTemplate;
};
union
{
// deprecated name
pRENDERDOC_GetLogFilePathTemplate GetLogFilePathTemplate;
// current name
pRENDERDOC_GetCaptureFilePathTemplate GetCaptureFilePathTemplate;
};
pRENDERDOC_GetNumCaptures GetNumCaptures;
pRENDERDOC_GetCapture GetCapture;
pRENDERDOC_TriggerCapture TriggerCapture;
// IsRemoteAccessConnected was renamed to IsTargetControlConnected in 1.1.1.
// This union allows old code to continue compiling without changes
union
{
// deprecated name
pRENDERDOC_IsRemoteAccessConnected IsRemoteAccessConnected;
// current name
pRENDERDOC_IsTargetControlConnected IsTargetControlConnected;
};
pRENDERDOC_LaunchReplayUI LaunchReplayUI;
pRENDERDOC_SetActiveWindow SetActiveWindow;
pRENDERDOC_StartFrameCapture StartFrameCapture;
pRENDERDOC_IsFrameCapturing IsFrameCapturing;
pRENDERDOC_EndFrameCapture EndFrameCapture;
// new function in 1.1.0
pRENDERDOC_TriggerMultiFrameCapture TriggerMultiFrameCapture;
// new function in 1.2.0
pRENDERDOC_SetCaptureFileComments SetCaptureFileComments;
// new function in 1.4.0
pRENDERDOC_DiscardFrameCapture DiscardFrameCapture;
// new function in 1.5.0
pRENDERDOC_ShowReplayUI ShowReplayUI;
// new function in 1.6.0
pRENDERDOC_SetCaptureTitle SetCaptureTitle;
// new functions in 1.7.0
pRENDERDOC_SetObjectAnnotation SetObjectAnnotation;
pRENDERDOC_SetCommandAnnotation SetCommandAnnotation;
} RENDERDOC_API_1_7_0;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_0_0;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_0_1;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_0_2;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_1_0;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_1_1;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_1_2;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_2_0;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_3_0;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_4_0;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_4_1;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_4_2;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_5_0;
typedef RENDERDOC_API_1_7_0 RENDERDOC_API_1_6_0;
//////////////////////////////////////////////////////////////////////////////////////////////////
// RenderDoc API entry point
//
// This entry point can be obtained via GetProcAddress/dlsym if RenderDoc is available.
//
// The name is the same as the typedef - "RENDERDOC_GetAPI"
//
// This function is not thread safe, and should not be called on multiple threads at once.
// Ideally, call this once as early as possible in your application's startup, before doing
// any API work, since some configuration functionality etc has to be done also before
// initialising any APIs.
//
// Parameters:
// version is a single value from the RENDERDOC_Version above.
//
// outAPIPointers will be filled out with a pointer to the corresponding struct of function
// pointers.
//
// Returns:
// 1 - if the outAPIPointers has been filled with a pointer to the API struct requested
// 0 - if the requested version is not supported or the arguments are invalid.
//
typedef int(RENDERDOC_CC *pRENDERDOC_GetAPI)(RENDERDOC_Version version, void **outAPIPointers);
#ifdef __cplusplus
} // extern "C"
#endif

View file

@ -23,7 +23,12 @@ target_include_directories(lsfg-vk-common
target_include_directories(lsfg-vk-common SYSTEM
PRIVATE thirdparty/include)
if(LSFGVK_INSTALL_DEVELOP)
target_compile_options(lsfg-vk-common PUBLIC
-Wno-cast-function-type-strict # Vulkan function pointers
-Wno-shadow # Shadowing variables used to be common practice
)
if(LSFGVK_INSTALL_LIBRARIES)
install(TARGETS lsfg-vk-common
ARCHIVE DESTINATION "${CMAKE_INSTALL_LIBDIR}")
install(DIRECTORY "include/lsfg-vk-common/"

View file

@ -6,7 +6,7 @@
namespace ls {
/// find the location of the Lossless.dll
/// find the location of the lsfg-vk.dll
/// @returns the path to the DLL
/// @throws ls::error if the DLL could not be found
std::filesystem::path findShaderDll();

View file

@ -21,7 +21,12 @@ namespace vk {
template<typename T>
Buffer(const vk::Vulkan& vk, const T& data,
VkBufferUsageFlags usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)
: Buffer(vk, reinterpret_cast<const void*>(&data), sizeof(T), usage) {}
: Buffer(
vk,
reinterpret_cast<const void*>(&data), // NOLINT (unsafe cast)
sizeof(T),
usage
) {}
/// create a buffer
/// @param vk the vulkan instance

View file

@ -42,7 +42,8 @@ namespace vk {
void blitImage(const vk::Vulkan& vk,
const std::vector<vk::Barrier>& preBarriers,
std::pair<VkImage, VkImage> images, VkExtent2D extent,
const std::vector<vk::Barrier>& postBarriers) const;
const std::vector<vk::Barrier>& postBarriers,
uint32_t srcLayer = 0, uint32_t dstLayer = 0) const;
/// insert a bunch of barriers
/// @param vk the vulkan instance
@ -68,7 +69,8 @@ namespace vk {
/// @param buffer the source buffer
/// @param image the destination image
void copyBufferToImage(const vk::Vulkan& vk,
const vk::Buffer& buffer, const vk::Image& image) const;
const vk::Buffer& buffer, const vk::Image& image,
uint32_t dstLayer = 0) const;
/// end recording commands
/// @param vk the vulkan instance

View file

@ -5,6 +5,7 @@
#include "../helpers/pointers.hpp"
#include "vulkan.hpp"
#include <cstdint>
#include <optional>
#include <vulkan/vulkan_core.h>
@ -26,7 +27,9 @@ namespace vk {
VkFormat format = VK_FORMAT_R8G8B8A8_UNORM,
VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
std::optional<int> importFd = std::nullopt,
std::optional<int*> exportFd = std::nullopt);
std::optional<int*> exportFd = std::nullopt,
uint32_t arrayLayers = 1
);
/// get the image handle
/// @return the image handle

View file

@ -30,7 +30,7 @@ void ConfigFile::createDefaultConfigFile(const std::filesystem::path& path) {
ofs << R"(version = 2
[global]
# dll = '/media/games/Lossless Scaling/Lossless.dll' # if you don't have LS in the default location
# dll = '/media/games/Lossless Scaling/lsfg-vk.dll' # if you don't have LS in the default location
allow_fp16 = true # this will improve give a MASSIVE performance boost on AMD, but be super slow on older (!) NVIDIA GPUs
[[profile]]

View file

@ -22,7 +22,7 @@ std::filesystem::path ls::findShaderDll() {
auto base = std::filesystem::path(xdgPath);
for (const auto& frag : FRAGMENTS) {
auto full = base / frag / "Lossless Scaling" / "Lossless.dll";
auto full = base / frag / "Lossless Scaling" / "lsfg-vk.dll";
if (std::filesystem::exists(full))
return full;
}
@ -34,16 +34,16 @@ std::filesystem::path ls::findShaderDll() {
auto base = std::filesystem::path(homePath);
for (const auto& frag : FRAGMENTS) {
auto full = base / frag / "Lossless Scaling" / "Lossless.dll";
auto full = base / frag / "Lossless Scaling" / "lsfg-vk.dll";
if (std::filesystem::exists(full))
return full;
}
}
// fallback to same directory
auto local = std::filesystem::current_path() / "Lossless.dll";
auto local = std::filesystem::current_path() / "lsfg-vk.dll";
if (std::filesystem::exists(local))
return local;
throw ls::error("unable to locate Lossless.dll, please set the path in the configuration");
throw ls::error("unable to locate lsfg-vk.dll, please set the path in the configuration");
}

View file

@ -105,7 +105,8 @@ void CommandBuffer::dispatch(const vk::Vulkan& vk,
void CommandBuffer::blitImage(const vk::Vulkan& vk,
const std::vector<vk::Barrier>& preBarriers,
std::pair<VkImage, VkImage> images, VkExtent2D extent,
const std::vector<vk::Barrier>& postBarriers) const {
const std::vector<vk::Barrier>& postBarriers,
uint32_t srcLayer, uint32_t dstLayer) const {
vk.df().CmdPipelineBarrier(*this->commandBuffer,
VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0,
@ -117,7 +118,8 @@ void CommandBuffer::blitImage(const vk::Vulkan& vk,
const VkImageBlit region{
.srcSubresource = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.layerCount = 1
.baseArrayLayer = srcLayer,
.layerCount = 1,
},
.srcOffsets = {
{ 0, 0, 0 },
@ -126,6 +128,7 @@ void CommandBuffer::blitImage(const vk::Vulkan& vk,
},
.dstSubresource = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseArrayLayer = dstLayer,
.layerCount = 1
},
.dstOffsets = {
@ -151,7 +154,8 @@ void CommandBuffer::blitImage(const vk::Vulkan& vk,
}
void CommandBuffer::copyBufferToImage(const vk::Vulkan& vk,
const vk::Buffer& buffer, const vk::Image& image) const {
const vk::Buffer& buffer, const vk::Image& image,
uint32_t dstLayer) const {
const VkImageMemoryBarrier barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_NONE,
@ -179,6 +183,7 @@ void CommandBuffer::copyBufferToImage(const vk::Vulkan& vk,
.bufferImageHeight = 0,
.imageSubresource = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseArrayLayer = dstLayer,
.layerCount = 1
},
.imageExtent = {

View file

@ -6,6 +6,7 @@
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <bitset>
#include <cstdint>
#include <optional>
#include <vulkan/vulkan_core.h>
@ -16,7 +17,7 @@ namespace {
/// create a image
ls::owned_ptr<VkImage> createImage(const vk::Vulkan& vk,
VkExtent2D extent, VkFormat format, VkImageUsageFlags usage,
bool external) {
bool external, uint32_t arrayLayers) {
VkImage handle{};
const VkExternalMemoryImageCreateInfo externalInfo{
@ -34,7 +35,7 @@ namespace {
.depth = 1
},
.mipLevels = 1,
.arrayLayers = 1,
.arrayLayers = arrayLayers,
.samples = VK_SAMPLE_COUNT_1_BIT,
.usage = usage,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE
@ -121,20 +122,20 @@ namespace {
}
/// create an image view
ls::owned_ptr<VkImageView> createImageView(const vk::Vulkan& vk,
VkImage image, VkFormat format) {
VkImage image, VkFormat format, uint32_t arrayLayers) {
VkImageView handle{};
const VkImageViewCreateInfo viewInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.image = image,
.viewType = VK_IMAGE_VIEW_TYPE_2D,
.viewType = arrayLayers == 1 ? VK_IMAGE_VIEW_TYPE_2D : VK_IMAGE_VIEW_TYPE_2D_ARRAY,
.format = format,
.subresourceRange = {
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
.baseMipLevel = 0,
.levelCount = 1,
.baseArrayLayer = 0,
.layerCount = 1
.layerCount = arrayLayers
}
};
auto res = vk.df().CreateImageView(vk.dev(), &viewInfo, VK_NULL_HANDLE, &handle);
@ -155,10 +156,13 @@ Image::Image(const vk::Vulkan& vk,
VkFormat format,
VkImageUsageFlags usage,
std::optional<int> importFd,
std::optional<int*> exportFd) :
std::optional<int*> exportFd,
uint32_t arrayLayers
) :
image(createImage(vk,
extent, format, usage,
importFd.has_value() || exportFd.has_value()
importFd.has_value() || exportFd.has_value(),
arrayLayers
)),
memory(allocateMemory(vk,
*this->image,
@ -166,7 +170,8 @@ Image::Image(const vk::Vulkan& vk,
)),
view(createImageView(vk,
*this->image,
format
format,
arrayLayers
)),
extent(extent) {
}

View file

@ -29,4 +29,3 @@ Checks:
# Vulkan layers often require C-style memory access
- -cppcoreguidelines-pro-bounds-pointer-arithmetic
- -cppcoreguidelines-pro-type-union-access
- -clang-diagnostic-unsafe-buffer-usage

View file

@ -10,7 +10,6 @@ target_link_libraries(lsfg-vk-layer
PUBLIC lsfg-vk-backend)
target_compile_options(lsfg-vk-layer PRIVATE
-Wno-unknown-warning-option
-Wno-unsafe-buffer-usage) # Array indexing
set_target_properties(lsfg-vk-layer PROPERTIES

View file

@ -22,17 +22,17 @@
using namespace lsfgvk::layer;
namespace {
// global layer info initialized at layer negotiation
/// Global layer info initialized at layer negotiation
struct LayerInfo {
std::unordered_map<std::string, PFN_vkVoidFunction> map; //!< function pointer override map
std::unordered_map<std::string, PFN_vkVoidFunction> map; //!< Function pointer override map
PFN_vkGetInstanceProcAddr GetInstanceProcAddr;
Root root;
}* layer_info; // NOLINT (global variable)
// instance-wide info initialized at instance creation(s)
/// Instance-wide info initialized at instance creation(s)
struct InstanceInfo {
std::vector<VkInstance> handles; // there may be several instances
std::vector<VkInstance> handles; // There may be several instances
vk::VulkanInstanceFuncs funcs;
std::unordered_map<VkDevice, vk::Vulkan> devices;
@ -40,44 +40,44 @@ namespace {
std::unordered_map<VkSwapchainKHR, SwapchainInfo> swapchainInfos;
}* instance_info; // NOLINT (global variable)
// create instance
/// Create instance
VkResult myvkCreateInstance(
const VkInstanceCreateInfo* info,
const VkAllocationCallbacks* alloc,
VkInstance* instance) {
// apply layer chaining
// Apply layer chaining
auto* layerInfo = reinterpret_cast<VkLayerInstanceCreateInfo*>(const_cast<void*>(info->pNext));
while (layerInfo && (layerInfo->sType != VK_STRUCTURE_TYPE_LOADER_INSTANCE_CREATE_INFO
|| layerInfo->function != VK_LAYER_LINK_INFO)) {
layerInfo = reinterpret_cast<VkLayerInstanceCreateInfo*>(const_cast<void*>(layerInfo->pNext));
}
if (!layerInfo) {
std::cerr << "lsfg-vk: no layer info found in pNext chain, "
std::cerr << "lsfg-vk: No layer info found in pNext chain, "
"the previous layer does not follow spec\n";
return VK_ERROR_INITIALIZATION_FAILED;
}
auto* linkInfo = layerInfo->u.pLayerInfo;
if (!linkInfo) {
std::cerr << "lsfg-vk: link info is null, "
std::cerr << "lsfg-vk: Link info is null, "
"the previous layer does not follow spec\n";
return VK_ERROR_INITIALIZATION_FAILED;
}
layer_info->GetInstanceProcAddr = linkInfo->pfnNextGetInstanceProcAddr;
if (!layer_info->GetInstanceProcAddr) {
std::cerr << "lsfg-vk: next layer's vkGetInstanceProcAddr is null, "
std::cerr << "lsfg-vk: Next layer's vkGetInstanceProcAddr is null, "
"the previous layer does not follow spec\n";
return VK_ERROR_INITIALIZATION_FAILED;
}
layerInfo->u.pLayerInfo = linkInfo->pNext; // advance for next layer
layerInfo->u.pLayerInfo = linkInfo->pNext; // Advance for next layer
// create instance
// Create instance
auto* vkCreateInstance = reinterpret_cast<PFN_vkCreateInstance>(
layer_info->GetInstanceProcAddr(VK_NULL_HANDLE, "vkCreateInstance"));
if (!vkCreateInstance) {
std::cerr << "lsfg-vk: failed to get next layer's vkCreateInstance, "
std::cerr << "lsfg-vk: Failed to get next layer's vkCreateInstance, "
"the previous layer does not follow spec\n";
return VK_ERROR_INITIALIZATION_FAILED;
}
@ -103,64 +103,64 @@ namespace {
return VK_SUCCESS;
} catch (const ls::vulkan_error& e) {
if (e.error() == VK_ERROR_EXTENSION_NOT_PRESENT)
std::cerr << "lsfg-vk: required Vulkan instance extensions are not present. "
std::cerr << "lsfg-vk: Required Vulkan instance extensions are not present. "
"Your GPU driver is not supported.\n";
return e.error();
}
}
// create device
/// Create device
VkResult myvkCreateDevice(
VkPhysicalDevice physdev,
const VkDeviceCreateInfo* info,
const VkAllocationCallbacks* alloc,
VkDevice* device) {
// apply layer chaining
// Apply layer chaining
auto* layerInfo = reinterpret_cast<VkLayerDeviceCreateInfo*>(const_cast<void*>(info->pNext));
while (layerInfo && (layerInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO
|| layerInfo->function != VK_LAYER_LINK_INFO)) {
layerInfo = reinterpret_cast<VkLayerDeviceCreateInfo*>(const_cast<void*>(layerInfo->pNext));
}
if (!layerInfo) {
std::cerr << "lsfg-vk: no layer info found in pNext chain, "
std::cerr << "lsfg-vk: No layer info found in pNext chain, "
"the previous layer does not follow spec\n";
return VK_ERROR_INITIALIZATION_FAILED;
}
auto* linkInfo = layerInfo->u.pLayerInfo;
if (!linkInfo) {
std::cerr << "lsfg-vk: link info is null, "
std::cerr << "lsfg-vk: Link info is null, "
"the previous layer does not follow spec\n";
return VK_ERROR_INITIALIZATION_FAILED;
}
instance_info->funcs.GetDeviceProcAddr = linkInfo->pfnNextGetDeviceProcAddr;
if (!linkInfo->pfnNextGetDeviceProcAddr) {
std::cerr << "lsfg-vk: next layer's vkGetDeviceProcAddr is null, "
std::cerr << "lsfg-vk: Next layer's vkGetDeviceProcAddr is null, "
"the previous layer does not follow spec\n";
return VK_ERROR_INITIALIZATION_FAILED;
}
layerInfo->u.pLayerInfo = linkInfo->pNext; // advance for next layer
layerInfo->u.pLayerInfo = linkInfo->pNext; // Advance for next layer
// fetch device loader functions
// Fetch device loader functions
layerInfo = reinterpret_cast<VkLayerDeviceCreateInfo*>(const_cast<void*>(info->pNext));
while (layerInfo && (layerInfo->sType != VK_STRUCTURE_TYPE_LOADER_DEVICE_CREATE_INFO
|| layerInfo->function != VK_LOADER_DATA_CALLBACK)) {
layerInfo = reinterpret_cast<VkLayerDeviceCreateInfo*>(const_cast<void*>(layerInfo->pNext));
}
if (!layerInfo) {
std::cerr << "lsfg-vk: no layer loader data found in pNext chain.\n";
std::cerr << "lsfg-vk: No layer loader data found in pNext chain.\n";
return VK_ERROR_INITIALIZATION_FAILED;
}
auto* setLoaderData = layerInfo->u.pfnSetDeviceLoaderData;
if (!setLoaderData) {
std::cerr << "lsfg-vk: instance loader data function is null.\n";
std::cerr << "lsfg-vk: Instance loader data function is null.\n";
return VK_ERROR_INITIALIZATION_FAILED;
}
// create device
// Create device
try {
VkDeviceCreateInfo newInfo = *info;
layer_info->root.modifyDeviceCreateInfo(newInfo,
@ -172,12 +172,12 @@ namespace {
);
} catch (const ls::vulkan_error& e) {
if (e.error() == VK_ERROR_EXTENSION_NOT_PRESENT)
std::cerr << "lsfg-vk: required Vulkan device extensions are not present. "
std::cerr << "lsfg-vk: Required Vulkan device extensions are not present. "
"Your GPU driver is not supported.\n";
return e.error();
}
// create layer instance
// Create layer instance
try {
instance_info->devices.emplace(
*device,
@ -189,25 +189,25 @@ namespace {
)
);
} catch (const std::exception& e) {
std::cerr << "lsfg-vk: something went wrong during lsfg-vk initialization:\n";
std::cerr << "lsfg-vk: Something went wrong during lsfg-vk initialization:\n";
std::cerr << "- " << e.what() << '\n';
}
return VK_SUCCESS;
}
// destroy device
/// Destroy device
void myvkDestroyDevice(VkDevice device, const VkAllocationCallbacks* alloc) {
// destroy layer instance
// Destroy layer instance
auto it = instance_info->devices.find(device);
if (it != instance_info->devices.end())
instance_info->devices.erase(it);
// destroy device
// Destroy device
auto vkDestroyDevice = reinterpret_cast<PFN_vkDestroyDevice>(
instance_info->funcs.GetDeviceProcAddr(device, "vkDestroyDevice"));
if (!vkDestroyDevice) {
std::cerr << "lsfg-vk: failed to get next layer's vkDestroyDevice, "
std::cerr << "lsfg-vk: Failed to get next layer's vkDestroyDevice, "
"the previous layer does not follow spec\n";
return;
}
@ -215,24 +215,24 @@ namespace {
vkDestroyDevice(device, alloc);
}
// destroy instance
/// Destroy instance
void myvkDestroyInstance(VkInstance instance, const VkAllocationCallbacks* alloc) {
// remove instance handle
// Remove instance handle
auto it = std::ranges::find(instance_info->handles, instance);
if (it != instance_info->handles.end())
instance_info->handles.erase(it);
// destroy instance info if no handles remain
// Destroy instance info if no handles remain
if (instance_info->handles.empty()) {
delete instance_info; // NOLINT (memory management)
instance_info = nullptr;
}
// destroy instance
// Destroy instance
auto vkDestroyInstance = reinterpret_cast<PFN_vkDestroyInstance>(
layer_info->GetInstanceProcAddr(instance, "vkDestroyInstance"));
if (!vkDestroyInstance) {
std::cerr << "lsfg-vk: failed to get next layer's vkDestroyInstance, "
std::cerr << "lsfg-vk: Failed to get next layer's vkDestroyInstance, "
"the previous layer does not follow spec\n";
return;
}
@ -240,7 +240,7 @@ namespace {
vkDestroyInstance(instance, alloc);
}
// get optional function pointer override
/// Get optional function pointer override
PFN_vkVoidFunction getProcAddr(const std::string& name) {
auto it = layer_info->map.find(name);
if (it != layer_info->map.end())
@ -248,7 +248,7 @@ namespace {
return nullptr;
}
// get instance-level function pointers
/// Get instance-level function pointers
PFN_vkVoidFunction myvkGetInstanceProcAddr(VkInstance instance, const char* name) {
if (!name) return nullptr;
@ -259,7 +259,7 @@ namespace {
return layer_info->GetInstanceProcAddr(instance, name);
}
// get device-level function pointers
/// Get device-level function pointers
PFN_vkVoidFunction myvkGetDeviceProcAddr(VkDevice device, const char* name) {
if (!name) return nullptr;
@ -282,7 +282,7 @@ namespace {
return VK_ERROR_INITIALIZATION_FAILED;
try {
// retire old swapchain
// Retire old swapchain
if (info->oldSwapchain) {
const auto& info_mapping = instance_info->swapchainInfos.find(info->oldSwapchain);
if (info_mapping != instance_info->swapchainInfos.end())
@ -295,9 +295,9 @@ namespace {
layer_info->root.removeSwapchainContext(info->oldSwapchain);
}
layer_info->root.update(); // ensure config is up to date
layer_info->root.update(); // Ensure config is up to date
// create swapchain
// Create swapchain
VkSwapchainCreateInfoKHR newInfo = *info;
layer_info->root.modifySwapchainCreateInfo(it->second, newInfo,
[=, newInfo = &newInfo]() {
@ -308,7 +308,7 @@ namespace {
}
);
// get all swapchain images
// Get all swapchain images
uint32_t imageCount{};
auto res = it->second.df().GetSwapchainImagesKHR(device, *swapchain,
&imageCount, VK_NULL_HANDLE);
@ -329,7 +329,7 @@ namespace {
.presentMode = newInfo.presentMode
}).first->second;
// create lsfg-vk swapchain
// Create lsfg-vk swapchain
layer_info->root.createSwapchainContext(it->second, *swapchain, info);
instance_info->swapchains.emplace(*swapchain,
@ -337,11 +337,11 @@ namespace {
return res;
} catch (const ls::vulkan_error& e) {
std::cerr << "lsfg-vk: something went wrong during lsfg-vk swapchain creation:\n";
std::cerr << "lsfg-vk: Something went wrong during lsfg-vk swapchain creation:\n";
std::cerr << "- " << e.what() << '\n';
return e.error();
} catch (const std::exception& e) {
std::cerr << "lsfg-vk: something went wrong during lsfg-vk swapchain creation:\n";
std::cerr << "lsfg-vk: Something went wrong during lsfg-vk swapchain creation:\n";
std::cerr << "- " << e.what() << '\n';
return VK_ERROR_INITIALIZATION_FAILED;
}
@ -349,16 +349,15 @@ namespace {
VkResult myvkQueuePresentKHR(VkQueue queue, const VkPresentInfoKHR* info) {
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunknown-warning-option"
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
VkResult result = VK_SUCCESS;
// ensure layer config is up to date
// Ensure layer config is up to date
bool reload{};
try {
reload = layer_info->root.update();
} catch (const std::exception&) {
reload = false; // ignore parse errors
reload = false; // Ignore parse errors
}
if (reload) {
@ -377,7 +376,7 @@ namespace {
}
}
// present each swapchain
// Present each swapchain
for (size_t i = 0; i < info->swapchainCount; i++) {
const auto& swapchain = info->pSwapchains[i];
@ -403,7 +402,7 @@ namespace {
if (e.error() != VK_ERROR_OUT_OF_DATE_KHR) {
std::cerr << "lsfg-vk: something went wrong during lsfg-vk swapchain presentation:\n";
std::cerr << "- " << e.what() << '\n';
} // silently swallow out-of-date errors
} // Silently swallow out-of-date errors
result = e.error();
} catch (const std::exception& e) {
@ -438,7 +437,7 @@ namespace {
layer_info->root.removeSwapchainContext(swapchain);
// destroy swapchain
// Destroy swapchain
it->second.df().DestroySwapchainKHR(device, swapchain, alloc);
}
}
@ -446,13 +445,13 @@ namespace {
/// Vulkan layer entrypoint
__attribute__((visibility("default")))
VkResult vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface* pVersionStruct) {
// ensure loader compatibility
// Ensure loader compatibility
if (!pVersionStruct
|| pVersionStruct->sType != LAYER_NEGOTIATE_INTERFACE_STRUCT
|| pVersionStruct->loaderLayerInterfaceVersion < 2)
return VK_ERROR_INITIALIZATION_FAILED;
// if the layer has already been initialized, skip
// If the layer has already been initialized, skip
if (layer_info) {
pVersionStruct->loaderLayerInterfaceVersion = 2;
pVersionStruct->pfnGetPhysicalDeviceProcAddr = nullptr;
@ -461,7 +460,7 @@ VkResult vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface* pVers
return VK_SUCCESS;
}
// load the layer configuration
// Load the layer configuration
try {
layer_info = new LayerInfo { // NOLINT (memory management)
.map = {
@ -478,20 +477,20 @@ VkResult vkNegotiateLoaderLayerInterfaceVersion(VkNegotiateLayerInterface* pVers
.root = Root()
};
if (!layer_info->root.active()) { // skip inactive
if (!layer_info->root.active()) { // Skip inactive
delete layer_info; // NOLINT (memory management)
layer_info = nullptr;
return VK_ERROR_INITIALIZATION_FAILED;
}
} catch (const std::exception& e) {
std::cerr << "lsfg-vk: something went wrong during lsfg-vk layer initialization:\n";
std::cerr << "lsfg-vk: Something went wrong during lsfg-vk layer initialization:\n";
std::cerr << "- " << e.what() << '\n';
return VK_ERROR_INITIALIZATION_FAILED;
}
// emplace function pointers/version
// Emplace function pointers/version
pVersionStruct->loaderLayerInterfaceVersion = 2;
pVersionStruct->pfnGetPhysicalDeviceProcAddr = nullptr;
pVersionStruct->pfnGetDeviceProcAddr = myvkGetDeviceProcAddr;

View file

@ -2,10 +2,10 @@
#include "instance.hpp"
#include "lsfg-vk-common/helpers/paths.hpp"
#include "swapchain.hpp"
#include "lsfg-vk-common/configuration/detection.hpp"
#include "lsfg-vk-common/helpers/errors.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include "swapchain.hpp"
#include <algorithm>
#include <cstdint>
@ -25,7 +25,7 @@ using namespace lsfgvk;
using namespace lsfgvk::layer;
namespace {
/// helper function to add required extensions
/// Helper function to add required extensions
std::vector<const char*> add_extensions(const char* const* existingExtensions, size_t count,
const std::vector<const char*>& requiredExtensions) {
std::vector<const char*> extensions(count);
@ -45,14 +45,14 @@ namespace {
}
Root::Root() {
// find active profile
// Find active profile
const auto& profile = findProfile(this->config.get(), ls::identify());
if (!profile.has_value())
return;
this->active_profile = profile->second;
std::cerr << "lsfg-vk: using profile with name '" << this->active_profile->name << "' ";
std::cerr << "lsfg-vk: Using profile with name '" << this->active_profile->name << "' ";
switch (profile->first) {
case ls::IdentType::OVERRIDE:
std::cerr << "(identified via override)\n";
@ -167,10 +167,10 @@ void Root::modifySwapchainCreateInfo(const vk::Vulkan& vk, VkSwapchainCreateInfo
void Root::createSwapchainContext(const vk::Vulkan& vk,
VkSwapchainKHR swapchain, const SwapchainInfo& info) {
if (!this->active_profile.has_value())
throw ls::error("attempted to create swapchain context while layer is inactive");
throw ls::error("Attempted to create swapchain context while layer is inactive");
const auto& profile = *this->active_profile;
if (!this->backend.has_value()) { // emplace backend late, due to loader bug
if (!this->backend.has_value()) { // Emplace backend late, due to loader bug
const auto& global = this->config.get().global();
setenv("DISABLE_LSFGVK", "1", 1);
@ -183,23 +183,12 @@ void Root::createSwapchainContext(const vk::Vulkan& vk,
dll = ls::findShaderDll();
this->backend.emplace(
[gpu = profile.gpu](
const std::string& deviceName,
std::pair<const std::string&, const std::string&> ids,
const std::optional<std::string>& pci
) {
if (!gpu)
return true;
return (deviceName == *gpu)
|| (ids.first + ":" + ids.second == *gpu)
|| (pci && *pci == *gpu);
},
profile.gpu.value_or(""),
dll, global.allow_fp16
);
} catch (const std::exception& e) {
unsetenv("DISABLE_LSFGVK");
throw ls::error("failed to create backend instance", e);
throw ls::error("Failed to create backend instance", e);
}
unsetenv("DISABLE_LSFGVK");

View file

@ -2,13 +2,14 @@
#pragma once
#include "lsfg-vk-backend/lsfgvk.hpp"
#include "lsfg-vk/lsfgvk.hpp"
#include "lsfg-vk-common/configuration/config.hpp"
#include "lsfg-vk-common/helpers/errors.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include "swapchain.hpp"
#include <functional>
#include <optional>
#include <unordered_map>
@ -16,49 +17,75 @@
namespace lsfgvk::layer {
/// root context of the lsfg-vk layer
///
/// Root context of the lsfg-vk layer
///
class Root {
public:
/// create the lsfg-vk root context
///
/// Create the lsfg-vk root context
///
/// @throws ls::error on failure
///
Root();
/// check if the layer is active
/// @return true if active
///
/// Check if the layer is active
///
/// @return true If active
///
[[nodiscard]] bool active() const { return this->active_profile.has_value(); }
/// ensure the layer is up-to-date
/// @return true if the configuration was updated
///
/// Ensure the layer is up-to-date
///
/// @return true If the configuration was updated
///
bool update();
/// modify instance create info
/// @param createInfo original create info
/// @param finish function to call after modification
///
/// Modify instance create info
///
/// @param createInfo Original create info
/// @param finish Function to call after modification
///
void modifyInstanceCreateInfo(VkInstanceCreateInfo& createInfo,
const std::function<void(void)>& finish) const;
/// modify device create info
/// @param createInfo original create info
/// @param finish function to call after modification
///
/// Modify device create info
///
/// @param createInfo Original create info
/// @param finish Function to call after modification
///
void modifyDeviceCreateInfo(VkDeviceCreateInfo& createInfo,
const std::function<void(void)>& finish) const;
/// modify swapchain create info
/// @param vk vulkan instance
/// @param createInfo original create info
/// @param finish function to call after modification
///
/// Modify swapchain create info
///
/// @param vk Vulkan instance
/// @param createInfo Original create info
/// @param finish Function to call after modification
///
void modifySwapchainCreateInfo(const vk::Vulkan& vk, VkSwapchainCreateInfoKHR& createInfo,
const std::function<void(void)>& finish) const;
/// create swapchain context
/// @param vk vulkan instance
/// @param swapchain swapchain handle
/// @param info swapchain info
///
/// Create swapchain context
///
/// @param vk Vulkan instance
/// @param swapchain Swapchain handle
/// @param info Swapchain info
/// @throws ls::error on failure
///
void createSwapchainContext(const vk::Vulkan& vk, VkSwapchainKHR swapchain,
const SwapchainInfo& info);
/// get swapchain context
/// @param swapchain swapchain handle
/// @return swapchain context
///
/// Get swapchain context
///
/// @param swapchain Swapchain handle
/// @return swapchain Context
/// @throws ls::error if not found
///
[[nodiscard]] Swapchain& getSwapchainContext(VkSwapchainKHR swapchain) {
const auto& it = this->swapchains.find(swapchain);
if (it == this->swapchains.end())
@ -66,14 +93,17 @@ namespace lsfgvk::layer {
return it->second;
}
/// remove swapchain context
/// @param swapchain swapchain handle
///
/// Remove swapchain context
///
/// @param swapchain Swapchain handle
///
void removeSwapchainContext(VkSwapchainKHR swapchain);
private:
ls::WatchedConfig config;
std::optional<ls::GameConf> active_profile;
ls::lazy<backend::Instance> backend;
ls::lazy<lsfgvk::Instance> backend;
std::unordered_map<VkSwapchainKHR, Swapchain> swapchains;
};

View file

@ -1,7 +1,7 @@
/* SPDX-License-Identifier: GPL-3.0-or-later */
#include "swapchain.hpp"
#include "lsfg-vk-backend/lsfgvk.hpp"
#include "lsfg-vk/lsfgvk.hpp"
#include "lsfg-vk-common/configuration/config.hpp"
#include "lsfg-vk-common/helpers/errors.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
@ -10,11 +10,10 @@
#include "lsfg-vk-common/vulkan/semaphore.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <exception>
#include <functional>
#include <memory>
#include <optional>
#include <utility>
#include <vector>
@ -25,6 +24,7 @@ using namespace lsfgvk;
using namespace lsfgvk::layer;
namespace {
/// Barrier helper
VkImageMemoryBarrier barrierHelper(VkImage handle,
VkAccessFlags srcAccessMask,
VkAccessFlags dstAccessMask,
@ -66,66 +66,45 @@ void layer::context_ModifySwapchainCreateInfo(const ls::GameConf& profile, uint3
}
}
Swapchain::Swapchain(const vk::Vulkan& vk, backend::Instance& backend,
Swapchain::Swapchain(const vk::Vulkan& vk, lsfgvk::Instance& backend,
ls::GameConf profile, SwapchainInfo info) :
instance(backend),
profile(std::move(profile)), info(std::move(info)) {
const VkExtent2D extent = this->info.extent;
const bool hdr = this->info.format > 57;
std::vector<int> sourceFds(2);
std::vector<int> destinationFds(this->profile.multiplier - 1);
this->sourceImages.reserve(sourceFds.size());
for (int& fd : sourceFds)
this->sourceImages.emplace_back(vk,
extent, hdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
std::nullopt, &fd);
this->destinationImages.reserve(destinationFds.size());
for (int& fd : destinationFds)
this->destinationImages.emplace_back(vk,
extent, hdr ? VK_FORMAT_R16G16B16A16_SFLOAT : VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
std::nullopt, &fd);
int syncFd{};
this->syncSemaphore.emplace(vk, 0, std::nullopt, &syncFd);
try {
this->ctx = ls::owned_ptr<ls::R<backend::Context>>(
new ls::R<backend::Context>(backend.openContext(
{ sourceFds.at(0), sourceFds.at(1) }, destinationFds, syncFd,
extent.width, extent.height,
hdr, 1.0F / this->profile.flow_scale, this->profile.performance_mode
)),
[backend = &backend](ls::R<backend::Context>& ctx) {
backend->closeContext(ctx);
}
this->ctx = std::make_unique<lsfgvk::Context>(
backend,
extent.width, extent.height,
this->profile.flow_scale,
this->profile.performance_mode
);
backend::makeLeaking(); // don't worry about it :3
this->total = static_cast<uint32_t>(this->profile.multiplier) - 1;
} catch (const std::exception& e) {
throw ls::error("failed to create swapchain context", e);
throw ls::error("Failed to create swapchain context", e);
}
const auto exportedFds = this->ctx->exportFds();
this->sourceImage.emplace(vk,
extent, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
exportedFds.sourceFd, std::nullopt, 2);
this->destinationImage.emplace(vk,
extent, VK_FORMAT_R8G8B8A8_UNORM,
VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
exportedFds.destinationFd);
this->syncSemaphore.emplace(vk, 0, exportedFds.syncFd);
this->renderCommandBuffer.emplace(vk);
this->renderFence.emplace(vk);
for (size_t i = 0; i < this->destinationImages.size(); i++) {
this->finalSemaphore.emplace(vk);
for (size_t i = 0; i < this->total; i++) {
this->passes.emplace_back(RenderPass {
.commandBuffer = vk::CommandBuffer(vk),
.acquireSemaphore = vk::Semaphore(vk)
.acquireSemaphore = vk::Semaphore(vk),
.copySemaphore = vk::Semaphore(vk)
});
}
const size_t frames = std::max(this->info.images.size(), this->destinationImages.size() + 2);
for (size_t i = 0; i < frames; i++) {
this->postCopySemaphores.emplace_back(
vk::Semaphore(vk),
vk::Semaphore(vk)
);
}
}
VkResult Swapchain::present(const vk::Vulkan& vk,
@ -133,19 +112,18 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
void* next_chain, uint32_t imageIdx,
const std::vector<VkSemaphore>& semaphores) {
const auto& swapchainImage = this->info.images.at(imageIdx);
const auto& sourceImage = this->sourceImages.at(this->fidx % 2);
const auto sourceImageIdx{static_cast<uint32_t>(this->iteration) % 2};
// schedule frame generation
// Schedule frame generation
try {
this->instance.get().scheduleFrames(this->ctx.get());
this->ctx->dispatch(this->total);
} catch (const std::exception& e) {
throw ls::error("failed to schedule frames", e);
throw ls::error("Failed to schedule frames", e);
}
// update present mode when not using pacing
// Update present mode when not using pacing
if (this->profile.pacing == ls::Pacing::None) {
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wunknown-warning-option"
#pragma clang diagnostic ignored "-Wunsafe-buffer-usage"
auto* info = reinterpret_cast<VkSwapchainPresentModeInfoEXT*>(next_chain);
while (info) {
@ -160,12 +138,12 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
#pragma clang diagnostic pop
}
// wait for completion of previous frame
if (this->fidx && !this->renderFence->wait(vk, 150ULL * 1000 * 1000))
// Wait for completion of previous frame
if (this->iteration && !this->renderFence->wait(vk, 150ULL * 1000 * 1000))
throw ls::vulkan_error(VK_TIMEOUT, "vkWaitForFences() failed");
this->renderFence->reset(vk);
// copy swapchain image into backend source image
// Copy swapchain image into backend source image
const auto& cmdbuf = *this->renderCommandBuffer;
cmdbuf.begin(vk);
@ -177,15 +155,15 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
),
barrierHelper(sourceImage.handle(),
barrierHelper(this->sourceImage->handle(),
VK_ACCESS_NONE,
VK_ACCESS_TRANSFER_WRITE_BIT,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
),
},
{ swapchainImage, sourceImage.handle() },
sourceImage.getExtent(),
{ swapchainImage, this->sourceImage->handle() },
this->sourceImage->getExtent(),
{
barrierHelper(swapchainImage,
VK_ACCESS_TRANSFER_READ_BIT,
@ -193,39 +171,40 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR
),
}
},
0, sourceImageIdx
);
cmdbuf.end(vk);
cmdbuf.submit(vk,
semaphores, VK_NULL_HANDLE, 0,
{}, this->syncSemaphore->handle(), this->idx++
{}, this->syncSemaphore->handle(), this->syncValue
);
for (size_t i = 0; i < this->destinationImages.size(); i++) {
auto& pcs = this->postCopySemaphores.at(this->idx % this->postCopySemaphores.size());
auto& destinationImage = this->destinationImages.at(i);
auto& pass = this->passes.at(i);
for (size_t i = 0; i < this->passes.size(); i++) {
auto& pass{this->passes.at(i)};
const bool last{i == (this->passes.size() - 1)};
// acquire swapchain image
uint32_t aqImageIdx{};
// Acquire swapchain image
uint32_t swapchainImageIdx{};
auto res = vk.df().AcquireNextImageKHR(vk.dev(), swapchain,
UINT64_MAX, pass.acquireSemaphore.handle(),
VK_NULL_HANDLE,
&aqImageIdx
&swapchainImageIdx
);
if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR)
throw ls::vulkan_error(res, "vkAcquireNextImageKHR() failed");
const auto& aquiredSwapchainImage = this->info.images.at(aqImageIdx);
const auto& aquiredSwapchainImage = this->info.images.at(swapchainImageIdx);
// copy backend destination image into swapchain image
// Copy backend destination image into swapchain image
auto& cmdbuf = pass.commandBuffer;
cmdbuf.begin(vk);
cmdbuf.blitImage(vk,
{
barrierHelper(destinationImage.handle(),
barrierHelper(this->destinationImage->handle(),
VK_ACCESS_NONE,
VK_ACCESS_TRANSFER_READ_BIT,
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
@ -238,8 +217,8 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL
),
},
{ destinationImage.handle(), aquiredSwapchainImage },
destinationImage.getExtent(),
{ this->destinationImage->handle(), aquiredSwapchainImage },
this->destinationImage->getExtent(),
{
barrierHelper(aquiredSwapchainImage,
VK_ACCESS_TRANSFER_WRITE_BIT,
@ -250,48 +229,43 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
}
);
std::vector<VkSemaphore> waitSemaphores{ pass.acquireSemaphore.handle() };
if (i) { // non-first pass
const auto& prevPCS = this->postCopySemaphores.at((this->idx - 1) % this->postCopySemaphores.size());
waitSemaphores.push_back(prevPCS.second.handle());
}
const std::vector<VkSemaphore> signalSemaphores{
pcs.first.handle(),
pcs.second.handle()
};
cmdbuf.end(vk);
std::vector<VkSemaphore> signalSemaphores{ pass.copySemaphore.handle() };
if (last)
signalSemaphores.push_back(this->finalSemaphore->handle());
this->syncValue++;
cmdbuf.submit(vk,
waitSemaphores, this->syncSemaphore->handle(), this->idx,
signalSemaphores, VK_NULL_HANDLE, 0,
i == this->destinationImages.size() - 1 ? this->renderFence->handle() : VK_NULL_HANDLE
{ pass.acquireSemaphore.handle() }, this->syncSemaphore->handle(), this->syncValue,
signalSemaphores, last ? nullptr : this->syncSemaphore->handle(), this->syncValue + 1,
last ? this->renderFence->handle() : VK_NULL_HANDLE
);
// present swapchain image
this->syncValue++;
// Present swapchain image
const VkPresentInfoKHR presentInfo{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.pNext = i ? nullptr : next_chain,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &pcs.first.handle(),
.pWaitSemaphores = &pass.copySemaphore.handle(),
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &aqImageIdx,
.pImageIndices = &swapchainImageIdx,
};
res = vk.df().QueuePresentKHR(queue,
&presentInfo);
if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR)
throw ls::vulkan_error(res, "vkQueuePresentKHR() failed");
this->idx++;
}
// present original swapchain image
auto& lastPCS = this->postCopySemaphores.at((this->idx - 1) % this->postCopySemaphores.size());
// Present original swapchain image
const VkPresentInfoKHR presentInfo{
.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR,
.waitSemaphoreCount = 1,
.pWaitSemaphores = &lastPCS.second.handle(),
.pWaitSemaphores = &this->finalSemaphore->handle(),
.swapchainCount = 1,
.pSwapchains = &swapchain,
.pImageIndices = &imageIdx,
@ -300,6 +274,7 @@ VkResult Swapchain::present(const vk::Vulkan& vk,
if (res != VK_SUCCESS && res != VK_SUBOPTIMAL_KHR)
throw ls::vulkan_error(res, "vkQueuePresentKHR() failed");
this->fidx++;
this->iteration++;
return res;
}

View file

@ -2,7 +2,7 @@
#pragma once
#include "lsfg-vk-backend/lsfgvk.hpp"
#include "lsfg-vk/lsfgvk.hpp"
#include "lsfg-vk-common/configuration/config.hpp"
#include "lsfg-vk-common/helpers/pointers.hpp"
#include "lsfg-vk-common/vulkan/command_buffer.hpp"
@ -12,15 +12,18 @@
#include "lsfg-vk-common/vulkan/timeline_semaphore.hpp"
#include "lsfg-vk-common/vulkan/vulkan.hpp"
#include <cstddef>
#include <cstdint>
#include <utility>
#include <memory>
#include <vector>
#include <vulkan/vulkan_core.h>
namespace lsfgvk::layer {
/// swapchain info struct
///
/// Swapchain info struct
///
struct SwapchainInfo {
std::vector<VkImage> images;
VkFormat format;
@ -29,53 +32,67 @@ namespace lsfgvk::layer {
VkPresentModeKHR presentMode;
};
/// modify the swapchain create info based on the profile pre-swapchain creation
/// @param profile active game profile
/// @param maxImages maximum number of images supported by the surface
/// @param createInfo swapchain create info to modify
///
/// Modify the swapchain create info based on the profile pre-swapchain creation
///
/// @param profile Active game profile
/// @param maxImages Maximum number of images supported by the surface
/// @param createInfo Swapchain create info to modify
///
void context_ModifySwapchainCreateInfo(const ls::GameConf& profile, uint32_t maxImages,
VkSwapchainCreateInfoKHR& createInfo);
/// swapchain context for a layer instance
///
/// Swapchain context for a layer instance
///
class Swapchain {
public:
/// create a new swapchain context
/// @param vk vulkan instance
///
/// Create a new swapchain context
///
/// @param vk Vulkan instance
/// @param backend lsfg-vk backend instance
/// @param profile active game profile
/// @param info swapchain info
Swapchain(const vk::Vulkan& vk, backend::Instance& backend,
/// @param profile Active game profile
/// @param info Swapchain info
///
Swapchain(const vk::Vulkan& vk, lsfgvk::Instance& backend,
ls::GameConf profile, SwapchainInfo info);
/// present a frame
/// @param vk vulkan instance
/// @param queue presentation queue
/// @param next_chain next chain pointer for the present info (WARN: shared!)
/// @param imageIdx swapchain image index to present to
/// @param semaphores semaphores to wait on before presenting
/// @throws ls::vulkan_error on vulkan errors
///
/// Present a frame
///
/// @param vk Vulkan instance
/// @param queue Presentation queue
/// @param next_chain next chain pointer for the present info (WARNING: shared!)
/// @param imageIdx Swapchain image index to present to
/// @param semaphores Semaphores to wait on before presenting
/// @throws ls::vulkan_error on vulkan error
///
VkResult present(const vk::Vulkan& vk,
VkQueue queue, VkSwapchainKHR swapchain,
void* next_chain, uint32_t imageIdx,
const std::vector<VkSemaphore>& semaphores);
private:
std::vector<vk::Image> sourceImages;
std::vector<vk::Image> destinationImages;
ls::lazy<vk::Image> sourceImage;
ls::lazy<vk::Image> destinationImage;
ls::lazy<vk::TimelineSemaphore> syncSemaphore;
ls::lazy<vk::CommandBuffer> renderCommandBuffer;
ls::lazy<vk::Fence> renderFence;
ls::lazy<vk::Semaphore> finalSemaphore;
struct RenderPass {
vk::CommandBuffer commandBuffer;
vk::Semaphore acquireSemaphore;
vk::Semaphore copySemaphore;
};
std::vector<RenderPass> passes;
std::vector<std::pair<vk::Semaphore, vk::Semaphore>> postCopySemaphores;
ls::R<backend::Instance> instance;
ls::owned_ptr<ls::R<backend::Context>> ctx;
size_t idx{1};
size_t fidx{0}; // real frame index
ls::R<lsfgvk::Instance> instance;
std::unique_ptr<lsfgvk::Context> ctx;
uint32_t total{};
size_t iteration{0};
size_t syncValue{1};
ls::GameConf profile;
SwapchainInfo info;

View file

@ -25,3 +25,5 @@ Checks:
- -portability-avoid-pragma-once
# Qt requires use of raw pointers in many places
- -cppcoreguidelines-owning-memory
# Qt seems to break some ranges algorithms in GCC
- -modernize-use-ranges

View file

@ -28,15 +28,13 @@ set_target_properties(lsfg-vk-ui PROPERTIES
AUTOUIC ON)
target_compile_options(lsfg-vk-ui PRIVATE # QT-codegen warnings
-Wno-unknown-warning-option
-Wno-ctad-maybe-unsupported
-Wno-unsafe-buffer-usage-in-libc-call
-Wno-global-constructors
-Wno-unsafe-buffer-usage)
-Wno-unsafe-buffer-usage
-Wno-global-constructors)
target_link_libraries(lsfg-vk-ui
PRIVATE lsfg-vk-common
PRIVATE lsfg-vk-backend
PRIVATE Qt6::Quick)
install(TARGETS lsfg-vk-ui

View file

@ -145,12 +145,12 @@ ApplicationWindow {
GroupEntry {
title: "Path to Lossless Scaling"
description: "Change the location of Lossless.dll"
description: "Change the location of lsfg-vk.dll"
FileEdit {
Layout.fillWidth: true
title: "Select Lossless.dll"
title: "Select lsfg-vk.dll"
filter: "Dynamic Link Library Files (*.dll)"
text: backend.dll

View file

@ -19,7 +19,7 @@ using namespace lsfgvk;
using namespace lsfgvk::ui;
Backend::Backend() {
// load configuration
// Load existing configuration
ls::ConfigFile config{};
auto path = ls::findConfigurationFile();
@ -27,7 +27,8 @@ Backend::Backend() {
try {
config = ls::ConfigFile(path);
} catch (const std::exception&) {
std::cerr << "the configuration file is invalid, it has been backed up to '.old'\n";
std::cerr << "The existing configuration file is invalid, "
<< "it has been backed up to '.old'\n";
std::filesystem::rename(path, path.string() + ".old");
}
}
@ -35,17 +36,17 @@ Backend::Backend() {
this->m_global = config.global();
this->m_profiles = config.profiles();
// create gpu list
this->m_gpu_list = ui::getAvailableGPUs();
// Create gpu list
this->m_gpu_list = ui::queryGPUs();
// create profile list model
// Create profile list model
QStringList profiles;
for (const auto& profile : this->m_profiles)
profiles.append(QString::fromStdString(profile.name));
this->m_profile_list_model = new QStringListModel(profiles, this);
// create active_in list models
// Create active_in list models
this->m_active_in_list_models.reserve(this->m_profiles.size());
for (const auto& profile : this->m_profiles) {
QStringList active_in;
@ -55,11 +56,11 @@ Backend::Backend() {
this->m_active_in_list_models.push_back(new QStringListModel(active_in, this));
}
// try to select first profile
// Try to select first profile
if (!this->m_profiles.empty())
this->m_profile_index = 0;
// spawn saving thread
// Spawn saving thread
std::thread([this, path]() {
while (true) {
std::this_thread::sleep_for(std::chrono::milliseconds(500));
@ -74,10 +75,10 @@ Backend::Backend() {
try {
std::filesystem::create_directories(path.parent_path());
if (!std::filesystem::exists(path.parent_path()))
throw ls::error("unable to create configuration directory");
throw ls::error("Unable to create configuration directory");
config.write(path);
} catch (const std::exception& e) {
std::cerr << "unable to write configuration:\n- " << e.what() << "\n";
std::cerr << "Unable to write configuration:\n- " << e.what() << "\n";
}
}
}).detach();

View file

@ -9,14 +9,18 @@
#include "lsfg-vk-common/configuration/config.hpp"
#include <atomic>
#include <cstddef>
#include <optional>
#include <stdexcept>
#include <utility>
#include <vector>
#define getters public
#define setters public
namespace lsfgvk::ui {
/// Class tying ui and configuration together
/// Class tying UI and Configuration together
class Backend : public QObject {
Q_OBJECT

View file

@ -5,63 +5,100 @@
#include <QString>
#include "utils.hpp"
#include "lsfg-vk-backend/lsfgvk.hpp"
#include <algorithm>
#include <optional>
#include <stdexcept>
#include <cstddef>
#include <iomanip>
#include <ios>
#include <sstream>
#include <string>
#include <utility>
#include <unordered_map>
#include <vector>
#define VULKAN_HPP_DISPATCH_LOADER_DYNAMIC 1
#define VULKAN_HPP_NO_DEFAULT_DISPATCHER 1
#define VULKAN_HPP_NO_CONSTRUCTORS 1
#include <vulkan/vulkan.hpp>
using namespace lsfgvk;
using namespace lsfgvk::ui;
QStringList ui::getAvailableGPUs() {
// list of found GPUs and their optional PCI IDs
std::vector<std::pair<std::string, std::optional<std::string>>> gpus{};
QStringList ui::queryGPUs() {
// Create a Vulkan instance
vk::detail::DispatchLoaderDynamic dld;
dld.init();
// create a backend to query all GPUs
try {
const backend::DevicePicker picker{[&gpus](
const std::string& deviceName,
std::pair<const std::string&, const std::string&>,
const std::optional<std::string>& pci
) {
gpus.emplace_back(deviceName, pci);
return false; // always fail
}};
const vk::ApplicationInfo appInfo{
.pApplicationName = "lsfg-vk-ui",
.applicationVersion = vk::makeVersion(2, 0, 0),
.pEngineName = "lsfg-vk-ui",
.engineVersion = vk::makeVersion(2, 0, 0),
.apiVersion = vk::ApiVersion12 // Required by lsfg-vk anyways
};
const vk::InstanceCreateInfo instanceInfo{
.pApplicationInfo = &appInfo
};
const vk::UniqueInstance instance{vk::createInstanceUnique(instanceInfo, nullptr, dld)};
dld.init(*instance);
const backend::Instance instance{picker, "/non/existent/path", false};
throw std::runtime_error("???");
} catch (const backend::error&) { // NOLINT (empty catch)
// expected
// Query physical devices
std::vector<std::string> devicesByName{};
std::vector<std::string> devicesByBusId{};
for (const auto& physdev : instance->enumeratePhysicalDevices(dld)) {
// Check for VK_EXT_pci_bus_info
bool supportsPCIEXT{false};
for (const auto& ext : physdev.enumerateDeviceExtensionProperties(nullptr, dld)) {
if (std::string(ext.extensionName) != vk::EXTPciBusInfoExtensionName)
continue;
supportsPCIEXT = true;
break;
}
// Fetch properties
vk::PhysicalDevicePCIBusInfoPropertiesEXT busInfo{};
vk::PhysicalDeviceProperties2 info{
.pNext = supportsPCIEXT ? &busInfo : nullptr
};
physdev.getProperties2(&info, dld);
auto& props{info.properties};
// Append device name
props.deviceName.back() = '\0'; // Ensure null-termination
devicesByName.emplace_back(props.deviceName);
// Append PCI bus ID
if (!supportsPCIEXT)
continue;
std::ostringstream pciss;
pciss << std::hex << std::setfill('0')
<< std::setw(4) << busInfo.pciDomain << ":"
<< std::setw(2) << busInfo.pciBus << ":"
<< std::setw(2) << busInfo.pciDevice << "."
<< std::setw(1) << busInfo.pciFunction;
devicesByBusId.emplace_back(pciss.str());
}
// NOLINTBEGIN (ranges) [GCC has some issues with ranges]
// first remove 1:1 duplicates
std::sort(gpus.begin(), gpus.end());
gpus.erase(std::unique(gpus.begin(), gpus.end()), gpus.end());
// NOLINTEND
// Count duplicate names
std::unordered_map<std::string, size_t> repeats{};
for (const auto& name : devicesByName)
repeats[name]++;
// build the frontend list
// Build the frontend list
QStringList list{"Default"};
for (const auto& gpu : gpus) {
// check if GPU is in list more than once
auto count = std::count_if(gpus.begin(), gpus.end(),
[&gpu](const auto& other) {
return other.first == gpu.first;
}
);
for (size_t i = 0; i < devicesByName.size(); i++) {
const auto& name{devicesByName.at(i)};
// add pci id to distinguish, otherwise add just the name
// Decide whether to show PCI bus ID or device name
QString entry;
if (count > 1 && gpu.second.has_value())
entry = QString::fromStdString(*gpu.second);
if (repeats[name] > 1)
entry = QString::fromStdString(devicesByBusId.at(i));
else
entry = QString::fromStdString(gpu.first);
entry = QString::fromStdString(name);
// ensure no duplicates (flatpak does funny things)
// Append to list if not already present (flatpak does funny things)
if (list.contains(entry))
continue;
list.append(entry);

View file

@ -6,9 +6,11 @@
namespace lsfgvk::ui {
/// get the list of available GPUs, automatically
/// switching to PCI IDs if there are duplicates
/// @return list of available GPUs
QStringList getAvailableGPUs();
///
/// Query all GPUs available on the system.
///
/// @return List of available GPUs
///
QStringList queryGPUs();
}