mirror of
https://github.com/PancakeTAS/lsfg-vk.git
synced 2025-10-30 07:01:10 +00:00
feat(fp16): add flag for overriding fp16
This commit is contained in:
parent
73e09afcf4
commit
012b18b97c
11 changed files with 22 additions and 7 deletions
|
|
@ -21,10 +21,11 @@ namespace LSFG::Core {
|
||||||
///
|
///
|
||||||
/// @param instance Vulkan instance
|
/// @param instance Vulkan instance
|
||||||
/// @param deviceUUID The UUID of the Vulkan device to use.
|
/// @param deviceUUID The UUID of the Vulkan device to use.
|
||||||
|
/// @param forceDisableFp16 Force-disable FP16 shaders.
|
||||||
///
|
///
|
||||||
/// @throws LSFG::vulkan_error if object creation fails.
|
/// @throws LSFG::vulkan_error if object creation fails.
|
||||||
///
|
///
|
||||||
Device(const Instance& instance, uint64_t deviceUUID);
|
Device(const Instance& instance, uint64_t deviceUUID, bool forceDisableFp16);
|
||||||
|
|
||||||
/// Get the Vulkan handle.
|
/// Get the Vulkan handle.
|
||||||
[[nodiscard]] auto handle() const { return *this->device; }
|
[[nodiscard]] auto handle() const { return *this->device; }
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ namespace LSFG_3_1 {
|
||||||
/// @param isHdr Whether the images are in HDR format.
|
/// @param isHdr Whether the images are in HDR format.
|
||||||
/// @param flowScale Internal flow scale factor.
|
/// @param flowScale Internal flow scale factor.
|
||||||
/// @param generationCount Number of frames to generate.
|
/// @param generationCount Number of frames to generate.
|
||||||
|
/// @param forceDisableFp16 Whether to force-disable FP16 optimizations.
|
||||||
/// @param loader Function to load shader source code by name.
|
/// @param loader Function to load shader source code by name.
|
||||||
///
|
///
|
||||||
/// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
|
/// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
|
||||||
|
|
@ -23,6 +24,7 @@ namespace LSFG_3_1 {
|
||||||
[[gnu::visibility("default")]]
|
[[gnu::visibility("default")]]
|
||||||
void initialize(uint64_t deviceUUID,
|
void initialize(uint64_t deviceUUID,
|
||||||
bool isHdr, float flowScale, uint64_t generationCount,
|
bool isHdr, float flowScale, uint64_t generationCount,
|
||||||
|
bool forceDisableFp16,
|
||||||
const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader);
|
const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader);
|
||||||
|
|
||||||
///
|
///
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@ namespace LSFG_3_1P {
|
||||||
/// @param isHdr Whether the images are in HDR format.
|
/// @param isHdr Whether the images are in HDR format.
|
||||||
/// @param flowScale Internal flow scale factor.
|
/// @param flowScale Internal flow scale factor.
|
||||||
/// @param generationCount Number of frames to generate.
|
/// @param generationCount Number of frames to generate.
|
||||||
|
/// @param forceDisableFp16 Whether to force-disable FP16 optimizations.
|
||||||
/// @param loader Function to load shader source code by name.
|
/// @param loader Function to load shader source code by name.
|
||||||
///
|
///
|
||||||
/// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
|
/// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
|
||||||
|
|
@ -23,6 +24,7 @@ namespace LSFG_3_1P {
|
||||||
[[gnu::visibility("default")]]
|
[[gnu::visibility("default")]]
|
||||||
void initialize(uint64_t deviceUUID,
|
void initialize(uint64_t deviceUUID,
|
||||||
bool isHdr, float flowScale, uint64_t generationCount,
|
bool isHdr, float flowScale, uint64_t generationCount,
|
||||||
|
bool forceDisableFp16,
|
||||||
const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader);
|
const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader);
|
||||||
|
|
||||||
///
|
///
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,7 @@ const std::vector<const char*> requiredExtensions = {
|
||||||
"VK_EXT_robustness2"
|
"VK_EXT_robustness2"
|
||||||
};
|
};
|
||||||
|
|
||||||
Device::Device(const Instance& instance, uint64_t deviceUUID) {
|
Device::Device(const Instance& instance, uint64_t deviceUUID, bool forceDisableFp16) {
|
||||||
// get all physical devices
|
// get all physical devices
|
||||||
uint32_t deviceCount{};
|
uint32_t deviceCount{};
|
||||||
auto res = vkEnumeratePhysicalDevices(instance.handle(), &deviceCount, nullptr);
|
auto res = vkEnumeratePhysicalDevices(instance.handle(), &deviceCount, nullptr);
|
||||||
|
|
@ -72,10 +72,10 @@ Device::Device(const Instance& instance, uint64_t deviceUUID) {
|
||||||
.pNext = &supported12Features
|
.pNext = &supported12Features
|
||||||
};
|
};
|
||||||
vkGetPhysicalDeviceFeatures2(*physicalDevice, &supportedFeatures);
|
vkGetPhysicalDeviceFeatures2(*physicalDevice, &supportedFeatures);
|
||||||
this->supportsFP16 = supported12Features.shaderFloat16;
|
this->supportsFP16 = !forceDisableFp16 && supported12Features.shaderFloat16;
|
||||||
if (this->supportsFP16)
|
if (this->supportsFP16)
|
||||||
std::cerr << "lsfg-vk: Using FP16 acceleration" << '\n';
|
std::cerr << "lsfg-vk: Using FP16 acceleration" << '\n';
|
||||||
else
|
else if (!forceDisableFp16)
|
||||||
std::cerr << "lsfg-vk: FP16 acceleration not supported, using FP32" << '\n';
|
std::cerr << "lsfg-vk: FP16 acceleration not supported, using FP32" << '\n';
|
||||||
|
|
||||||
// create logical device
|
// create logical device
|
||||||
|
|
|
||||||
|
|
@ -35,13 +35,14 @@ namespace {
|
||||||
|
|
||||||
void LSFG_3_1::initialize(uint64_t deviceUUID,
|
void LSFG_3_1::initialize(uint64_t deviceUUID,
|
||||||
bool isHdr, float flowScale, uint64_t generationCount,
|
bool isHdr, float flowScale, uint64_t generationCount,
|
||||||
|
bool forceDisableFp16,
|
||||||
const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) {
|
const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) {
|
||||||
if (instance.has_value() || device.has_value())
|
if (instance.has_value() || device.has_value())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
instance.emplace();
|
instance.emplace();
|
||||||
device.emplace(Vulkan {
|
device.emplace(Vulkan {
|
||||||
.device{*instance, deviceUUID},
|
.device{*instance, deviceUUID, forceDisableFp16},
|
||||||
.generationCount = generationCount,
|
.generationCount = generationCount,
|
||||||
.flowScale = flowScale,
|
.flowScale = flowScale,
|
||||||
.isHdr = isHdr
|
.isHdr = isHdr
|
||||||
|
|
|
||||||
|
|
@ -35,13 +35,14 @@ namespace {
|
||||||
|
|
||||||
void LSFG_3_1P::initialize(uint64_t deviceUUID,
|
void LSFG_3_1P::initialize(uint64_t deviceUUID,
|
||||||
bool isHdr, float flowScale, uint64_t generationCount,
|
bool isHdr, float flowScale, uint64_t generationCount,
|
||||||
|
bool forceDisableFp16,
|
||||||
const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) {
|
const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) {
|
||||||
if (instance.has_value() || device.has_value())
|
if (instance.has_value() || device.has_value())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
instance.emplace();
|
instance.emplace();
|
||||||
device.emplace(Vulkan {
|
device.emplace(Vulkan {
|
||||||
.device{*instance, deviceUUID},
|
.device{*instance, deviceUUID, forceDisableFp16},
|
||||||
.generationCount = generationCount,
|
.generationCount = generationCount,
|
||||||
.flowScale = flowScale,
|
.flowScale = flowScale,
|
||||||
.isHdr = isHdr
|
.isHdr = isHdr
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,8 @@ namespace Config {
|
||||||
bool enable{false};
|
bool enable{false};
|
||||||
/// Path to Lossless.dll.
|
/// Path to Lossless.dll.
|
||||||
std::string dll;
|
std::string dll;
|
||||||
|
/// Whether FP16 is force-disabled
|
||||||
|
bool no_fp16{false};
|
||||||
|
|
||||||
/// The frame generation muliplier
|
/// The frame generation muliplier
|
||||||
size_t multiplier{2};
|
size_t multiplier{2};
|
||||||
|
|
|
||||||
|
|
@ -74,6 +74,7 @@ void Config::updateConfig(const std::string& file) {
|
||||||
const toml::value globalTable = toml::find_or_default<toml::table>(toml, "global");
|
const toml::value globalTable = toml::find_or_default<toml::table>(toml, "global");
|
||||||
const Configuration global{
|
const Configuration global{
|
||||||
.dll = toml::find_or(globalTable, "dll", std::string()),
|
.dll = toml::find_or(globalTable, "dll", std::string()),
|
||||||
|
.no_fp16 = toml::find_or(globalTable, "no_fp16", false),
|
||||||
.config_file = file,
|
.config_file = file,
|
||||||
.timestamp = std::filesystem::last_write_time(file)
|
.timestamp = std::filesystem::last_write_time(file)
|
||||||
};
|
};
|
||||||
|
|
@ -97,6 +98,7 @@ void Config::updateConfig(const std::string& file) {
|
||||||
Configuration game{
|
Configuration game{
|
||||||
.enable = true,
|
.enable = true,
|
||||||
.dll = global.dll,
|
.dll = global.dll,
|
||||||
|
.no_fp16 = global.no_fp16,
|
||||||
.multiplier = toml::find_or(gameTable, "multiplier", 2U),
|
.multiplier = toml::find_or(gameTable, "multiplier", 2U),
|
||||||
.flowScale = toml::find_or(gameTable, "flow_scale", 1.0F),
|
.flowScale = toml::find_or(gameTable, "flow_scale", 1.0F),
|
||||||
.performance = toml::find_or(gameTable, "performance_mode", false),
|
.performance = toml::find_or(gameTable, "performance_mode", false),
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,7 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain,
|
||||||
// print config
|
// print config
|
||||||
std::cerr << "lsfg-vk: Reloaded configuration for " << name.second << ":\n";
|
std::cerr << "lsfg-vk: Reloaded configuration for " << name.second << ":\n";
|
||||||
if (!conf.dll.empty()) std::cerr << " Using DLL from: " << conf.dll << '\n';
|
if (!conf.dll.empty()) std::cerr << " Using DLL from: " << conf.dll << '\n';
|
||||||
|
if (conf.no_fp16) std::cerr << " FP16 Acceleration: Force-disabled\n";
|
||||||
std::cerr << " Multiplier: " << conf.multiplier << '\n';
|
std::cerr << " Multiplier: " << conf.multiplier << '\n';
|
||||||
std::cerr << " Flow Scale: " << conf.flowScale << '\n';
|
std::cerr << " Flow Scale: " << conf.flowScale << '\n';
|
||||||
std::cerr << " Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n';
|
std::cerr << " Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n';
|
||||||
|
|
@ -98,6 +99,7 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain,
|
||||||
lsfgInitialize(
|
lsfgInitialize(
|
||||||
Utils::getDeviceUUID(info.physicalDevice),
|
Utils::getDeviceUUID(info.physicalDevice),
|
||||||
conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1,
|
conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1,
|
||||||
|
conf.no_fp16,
|
||||||
Extract::getShader
|
Extract::getShader
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,7 @@ namespace {
|
||||||
// print config
|
// print config
|
||||||
std::cerr << "lsfg-vk: Loaded configuration for " << name.second << ":\n";
|
std::cerr << "lsfg-vk: Loaded configuration for " << name.second << ":\n";
|
||||||
if (!conf.dll.empty()) std::cerr << " Using DLL from: " << conf.dll << '\n';
|
if (!conf.dll.empty()) std::cerr << " Using DLL from: " << conf.dll << '\n';
|
||||||
|
if (conf.no_fp16) std::cerr << " FP16 Acceleration: Force-disabled\n";
|
||||||
std::cerr << " Multiplier: " << conf.multiplier << '\n';
|
std::cerr << " Multiplier: " << conf.multiplier << '\n';
|
||||||
std::cerr << " Flow Scale: " << conf.flowScale << '\n';
|
std::cerr << " Flow Scale: " << conf.flowScale << '\n';
|
||||||
std::cerr << " Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n';
|
std::cerr << " Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n';
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@ void Benchmark::run(uint32_t width, uint32_t height) {
|
||||||
lsfgInitialize(
|
lsfgInitialize(
|
||||||
deviceUUID, // some magic number if not given
|
deviceUUID, // some magic number if not given
|
||||||
conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1,
|
conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1,
|
||||||
|
conf.no_fp16,
|
||||||
Extract::getShader
|
Extract::getShader
|
||||||
);
|
);
|
||||||
const int32_t ctx = lsfgCreateContext(-1, -1, {},
|
const int32_t ctx = lsfgCreateContext(-1, -1, {},
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue