diff --git a/framegen/include/core/device.hpp b/framegen/include/core/device.hpp index ac8ee5a..a913a42 100644 --- a/framegen/include/core/device.hpp +++ b/framegen/include/core/device.hpp @@ -21,10 +21,11 @@ namespace LSFG::Core { /// /// @param instance Vulkan instance /// @param deviceUUID The UUID of the Vulkan device to use. + /// @param forceDisableFp16 Force-disable FP16 shaders. /// /// @throws LSFG::vulkan_error if object creation fails. /// - Device(const Instance& instance, uint64_t deviceUUID); + Device(const Instance& instance, uint64_t deviceUUID, bool forceDisableFp16); /// Get the Vulkan handle. [[nodiscard]] auto handle() const { return *this->device; } diff --git a/framegen/public/lsfg_3_1.hpp b/framegen/public/lsfg_3_1.hpp index 6486612..5db4219 100644 --- a/framegen/public/lsfg_3_1.hpp +++ b/framegen/public/lsfg_3_1.hpp @@ -16,6 +16,7 @@ namespace LSFG_3_1 { /// @param isHdr Whether the images are in HDR format. /// @param flowScale Internal flow scale factor. /// @param generationCount Number of frames to generate. + /// @param forceDisableFp16 Whether to force-disable FP16 optimizations. /// @param loader Function to load shader source code by name. /// /// @throws LSFG::vulkan_error if Vulkan objects fail to initialize. @@ -23,6 +24,7 @@ namespace LSFG_3_1 { [[gnu::visibility("default")]] void initialize(uint64_t deviceUUID, bool isHdr, float flowScale, uint64_t generationCount, + bool forceDisableFp16, const std::function(const std::string&, bool)>& loader); /// diff --git a/framegen/public/lsfg_3_1p.hpp b/framegen/public/lsfg_3_1p.hpp index 410541b..957bf09 100644 --- a/framegen/public/lsfg_3_1p.hpp +++ b/framegen/public/lsfg_3_1p.hpp @@ -16,6 +16,7 @@ namespace LSFG_3_1P { /// @param isHdr Whether the images are in HDR format. /// @param flowScale Internal flow scale factor. /// @param generationCount Number of frames to generate. + /// @param forceDisableFp16 Whether to force-disable FP16 optimizations. /// @param loader Function to load shader source code by name. /// /// @throws LSFG::vulkan_error if Vulkan objects fail to initialize. @@ -23,6 +24,7 @@ namespace LSFG_3_1P { [[gnu::visibility("default")]] void initialize(uint64_t deviceUUID, bool isHdr, float flowScale, uint64_t generationCount, + bool forceDisableFp16, const std::function(const std::string&, bool)>& loader); /// diff --git a/framegen/src/core/device.cpp b/framegen/src/core/device.cpp index f57547f..acb802b 100644 --- a/framegen/src/core/device.cpp +++ b/framegen/src/core/device.cpp @@ -19,7 +19,7 @@ const std::vector requiredExtensions = { "VK_EXT_robustness2" }; -Device::Device(const Instance& instance, uint64_t deviceUUID) { +Device::Device(const Instance& instance, uint64_t deviceUUID, bool forceDisableFp16) { // get all physical devices uint32_t deviceCount{}; auto res = vkEnumeratePhysicalDevices(instance.handle(), &deviceCount, nullptr); @@ -72,10 +72,10 @@ Device::Device(const Instance& instance, uint64_t deviceUUID) { .pNext = &supported12Features }; vkGetPhysicalDeviceFeatures2(*physicalDevice, &supportedFeatures); - this->supportsFP16 = supported12Features.shaderFloat16; + this->supportsFP16 = !forceDisableFp16 && supported12Features.shaderFloat16; if (this->supportsFP16) std::cerr << "lsfg-vk: Using FP16 acceleration" << '\n'; - else + else if (!forceDisableFp16) std::cerr << "lsfg-vk: FP16 acceleration not supported, using FP32" << '\n'; // create logical device diff --git a/framegen/v3.1_src/lsfg.cpp b/framegen/v3.1_src/lsfg.cpp index 6b5db5f..c8b5ed9 100644 --- a/framegen/v3.1_src/lsfg.cpp +++ b/framegen/v3.1_src/lsfg.cpp @@ -35,13 +35,14 @@ namespace { void LSFG_3_1::initialize(uint64_t deviceUUID, bool isHdr, float flowScale, uint64_t generationCount, + bool forceDisableFp16, const std::function(const std::string&, bool)>& loader) { if (instance.has_value() || device.has_value()) return; instance.emplace(); device.emplace(Vulkan { - .device{*instance, deviceUUID}, + .device{*instance, deviceUUID, forceDisableFp16}, .generationCount = generationCount, .flowScale = flowScale, .isHdr = isHdr diff --git a/framegen/v3.1p_src/lsfg.cpp b/framegen/v3.1p_src/lsfg.cpp index 9252e82..e123e54 100644 --- a/framegen/v3.1p_src/lsfg.cpp +++ b/framegen/v3.1p_src/lsfg.cpp @@ -35,13 +35,14 @@ namespace { void LSFG_3_1P::initialize(uint64_t deviceUUID, bool isHdr, float flowScale, uint64_t generationCount, + bool forceDisableFp16, const std::function(const std::string&, bool)>& loader) { if (instance.has_value() || device.has_value()) return; instance.emplace(); device.emplace(Vulkan { - .device{*instance, deviceUUID}, + .device{*instance, deviceUUID, forceDisableFp16}, .generationCount = generationCount, .flowScale = flowScale, .isHdr = isHdr diff --git a/include/config/config.hpp b/include/config/config.hpp index eef37da..c5acd10 100644 --- a/include/config/config.hpp +++ b/include/config/config.hpp @@ -15,6 +15,8 @@ namespace Config { bool enable{false}; /// Path to Lossless.dll. std::string dll; + /// Whether FP16 is force-disabled + bool no_fp16{false}; /// The frame generation muliplier size_t multiplier{2}; diff --git a/src/config/config.cpp b/src/config/config.cpp index ea98a81..2d4061f 100644 --- a/src/config/config.cpp +++ b/src/config/config.cpp @@ -73,7 +73,8 @@ void Config::updateConfig(const std::string& file) { // parse global configuration const toml::value globalTable = toml::find_or_default(toml, "global"); const Configuration global{ - .dll = toml::find_or(globalTable, "dll", std::string()), + .dll = toml::find_or(globalTable, "dll", std::string()), + .no_fp16 = toml::find_or(globalTable, "no_fp16", false), .config_file = file, .timestamp = std::filesystem::last_write_time(file) }; @@ -97,6 +98,7 @@ void Config::updateConfig(const std::string& file) { Configuration game{ .enable = true, .dll = global.dll, + .no_fp16 = global.no_fp16, .multiplier = toml::find_or(gameTable, "multiplier", 2U), .flowScale = toml::find_or(gameTable, "flow_scale", 1.0F), .performance = toml::find_or(gameTable, "performance_mode", false), diff --git a/src/context.cpp b/src/context.cpp index aa55da3..2eb0f00 100644 --- a/src/context.cpp +++ b/src/context.cpp @@ -53,6 +53,7 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain, // print config std::cerr << "lsfg-vk: Reloaded configuration for " << name.second << ":\n"; if (!conf.dll.empty()) std::cerr << " Using DLL from: " << conf.dll << '\n'; + if (conf.no_fp16) std::cerr << " FP16 Acceleration: Force-disabled\n"; std::cerr << " Multiplier: " << conf.multiplier << '\n'; std::cerr << " Flow Scale: " << conf.flowScale << '\n'; std::cerr << " Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n'; @@ -98,6 +99,7 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain, lsfgInitialize( Utils::getDeviceUUID(info.physicalDevice), conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1, + conf.no_fp16, Extract::getShader ); diff --git a/src/main.cpp b/src/main.cpp index 756c6dc..b8e77de 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -47,6 +47,7 @@ namespace { // print config std::cerr << "lsfg-vk: Loaded configuration for " << name.second << ":\n"; if (!conf.dll.empty()) std::cerr << " Using DLL from: " << conf.dll << '\n'; + if (conf.no_fp16) std::cerr << " FP16 Acceleration: Force-disabled\n"; std::cerr << " Multiplier: " << conf.multiplier << '\n'; std::cerr << " Flow Scale: " << conf.flowScale << '\n'; std::cerr << " Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n'; diff --git a/src/utils/benchmark.cpp b/src/utils/benchmark.cpp index 444663c..ed9ea4d 100644 --- a/src/utils/benchmark.cpp +++ b/src/utils/benchmark.cpp @@ -41,6 +41,7 @@ void Benchmark::run(uint32_t width, uint32_t height) { lsfgInitialize( deviceUUID, // some magic number if not given conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1, + conf.no_fp16, Extract::getShader ); const int32_t ctx = lsfgCreateContext(-1, -1, {},