feat(fp16): add flag for overriding fp16

This commit is contained in:
PancakeTAS 2025-08-05 02:07:22 +02:00 committed by Pancake
parent 73e09afcf4
commit 012b18b97c
11 changed files with 22 additions and 7 deletions

View file

@ -21,10 +21,11 @@ namespace LSFG::Core {
/// ///
/// @param instance Vulkan instance /// @param instance Vulkan instance
/// @param deviceUUID The UUID of the Vulkan device to use. /// @param deviceUUID The UUID of the Vulkan device to use.
/// @param forceDisableFp16 Force-disable FP16 shaders.
/// ///
/// @throws LSFG::vulkan_error if object creation fails. /// @throws LSFG::vulkan_error if object creation fails.
/// ///
Device(const Instance& instance, uint64_t deviceUUID); Device(const Instance& instance, uint64_t deviceUUID, bool forceDisableFp16);
/// Get the Vulkan handle. /// Get the Vulkan handle.
[[nodiscard]] auto handle() const { return *this->device; } [[nodiscard]] auto handle() const { return *this->device; }

View file

@ -16,6 +16,7 @@ namespace LSFG_3_1 {
/// @param isHdr Whether the images are in HDR format. /// @param isHdr Whether the images are in HDR format.
/// @param flowScale Internal flow scale factor. /// @param flowScale Internal flow scale factor.
/// @param generationCount Number of frames to generate. /// @param generationCount Number of frames to generate.
/// @param forceDisableFp16 Whether to force-disable FP16 optimizations.
/// @param loader Function to load shader source code by name. /// @param loader Function to load shader source code by name.
/// ///
/// @throws LSFG::vulkan_error if Vulkan objects fail to initialize. /// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
@ -23,6 +24,7 @@ namespace LSFG_3_1 {
[[gnu::visibility("default")]] [[gnu::visibility("default")]]
void initialize(uint64_t deviceUUID, void initialize(uint64_t deviceUUID,
bool isHdr, float flowScale, uint64_t generationCount, bool isHdr, float flowScale, uint64_t generationCount,
bool forceDisableFp16,
const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader); const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader);
/// ///

View file

@ -16,6 +16,7 @@ namespace LSFG_3_1P {
/// @param isHdr Whether the images are in HDR format. /// @param isHdr Whether the images are in HDR format.
/// @param flowScale Internal flow scale factor. /// @param flowScale Internal flow scale factor.
/// @param generationCount Number of frames to generate. /// @param generationCount Number of frames to generate.
/// @param forceDisableFp16 Whether to force-disable FP16 optimizations.
/// @param loader Function to load shader source code by name. /// @param loader Function to load shader source code by name.
/// ///
/// @throws LSFG::vulkan_error if Vulkan objects fail to initialize. /// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
@ -23,6 +24,7 @@ namespace LSFG_3_1P {
[[gnu::visibility("default")]] [[gnu::visibility("default")]]
void initialize(uint64_t deviceUUID, void initialize(uint64_t deviceUUID,
bool isHdr, float flowScale, uint64_t generationCount, bool isHdr, float flowScale, uint64_t generationCount,
bool forceDisableFp16,
const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader); const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader);
/// ///

View file

@ -19,7 +19,7 @@ const std::vector<const char*> requiredExtensions = {
"VK_EXT_robustness2" "VK_EXT_robustness2"
}; };
Device::Device(const Instance& instance, uint64_t deviceUUID) { Device::Device(const Instance& instance, uint64_t deviceUUID, bool forceDisableFp16) {
// get all physical devices // get all physical devices
uint32_t deviceCount{}; uint32_t deviceCount{};
auto res = vkEnumeratePhysicalDevices(instance.handle(), &deviceCount, nullptr); auto res = vkEnumeratePhysicalDevices(instance.handle(), &deviceCount, nullptr);
@ -72,10 +72,10 @@ Device::Device(const Instance& instance, uint64_t deviceUUID) {
.pNext = &supported12Features .pNext = &supported12Features
}; };
vkGetPhysicalDeviceFeatures2(*physicalDevice, &supportedFeatures); vkGetPhysicalDeviceFeatures2(*physicalDevice, &supportedFeatures);
this->supportsFP16 = supported12Features.shaderFloat16; this->supportsFP16 = !forceDisableFp16 && supported12Features.shaderFloat16;
if (this->supportsFP16) if (this->supportsFP16)
std::cerr << "lsfg-vk: Using FP16 acceleration" << '\n'; std::cerr << "lsfg-vk: Using FP16 acceleration" << '\n';
else else if (!forceDisableFp16)
std::cerr << "lsfg-vk: FP16 acceleration not supported, using FP32" << '\n'; std::cerr << "lsfg-vk: FP16 acceleration not supported, using FP32" << '\n';
// create logical device // create logical device

View file

@ -35,13 +35,14 @@ namespace {
void LSFG_3_1::initialize(uint64_t deviceUUID, void LSFG_3_1::initialize(uint64_t deviceUUID,
bool isHdr, float flowScale, uint64_t generationCount, bool isHdr, float flowScale, uint64_t generationCount,
bool forceDisableFp16,
const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) { const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) {
if (instance.has_value() || device.has_value()) if (instance.has_value() || device.has_value())
return; return;
instance.emplace(); instance.emplace();
device.emplace(Vulkan { device.emplace(Vulkan {
.device{*instance, deviceUUID}, .device{*instance, deviceUUID, forceDisableFp16},
.generationCount = generationCount, .generationCount = generationCount,
.flowScale = flowScale, .flowScale = flowScale,
.isHdr = isHdr .isHdr = isHdr

View file

@ -35,13 +35,14 @@ namespace {
void LSFG_3_1P::initialize(uint64_t deviceUUID, void LSFG_3_1P::initialize(uint64_t deviceUUID,
bool isHdr, float flowScale, uint64_t generationCount, bool isHdr, float flowScale, uint64_t generationCount,
bool forceDisableFp16,
const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) { const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) {
if (instance.has_value() || device.has_value()) if (instance.has_value() || device.has_value())
return; return;
instance.emplace(); instance.emplace();
device.emplace(Vulkan { device.emplace(Vulkan {
.device{*instance, deviceUUID}, .device{*instance, deviceUUID, forceDisableFp16},
.generationCount = generationCount, .generationCount = generationCount,
.flowScale = flowScale, .flowScale = flowScale,
.isHdr = isHdr .isHdr = isHdr

View file

@ -15,6 +15,8 @@ namespace Config {
bool enable{false}; bool enable{false};
/// Path to Lossless.dll. /// Path to Lossless.dll.
std::string dll; std::string dll;
/// Whether FP16 is force-disabled
bool no_fp16{false};
/// The frame generation muliplier /// The frame generation muliplier
size_t multiplier{2}; size_t multiplier{2};

View file

@ -73,7 +73,8 @@ void Config::updateConfig(const std::string& file) {
// parse global configuration // parse global configuration
const toml::value globalTable = toml::find_or_default<toml::table>(toml, "global"); const toml::value globalTable = toml::find_or_default<toml::table>(toml, "global");
const Configuration global{ const Configuration global{
.dll = toml::find_or(globalTable, "dll", std::string()), .dll = toml::find_or(globalTable, "dll", std::string()),
.no_fp16 = toml::find_or(globalTable, "no_fp16", false),
.config_file = file, .config_file = file,
.timestamp = std::filesystem::last_write_time(file) .timestamp = std::filesystem::last_write_time(file)
}; };
@ -97,6 +98,7 @@ void Config::updateConfig(const std::string& file) {
Configuration game{ Configuration game{
.enable = true, .enable = true,
.dll = global.dll, .dll = global.dll,
.no_fp16 = global.no_fp16,
.multiplier = toml::find_or(gameTable, "multiplier", 2U), .multiplier = toml::find_or(gameTable, "multiplier", 2U),
.flowScale = toml::find_or(gameTable, "flow_scale", 1.0F), .flowScale = toml::find_or(gameTable, "flow_scale", 1.0F),
.performance = toml::find_or(gameTable, "performance_mode", false), .performance = toml::find_or(gameTable, "performance_mode", false),

View file

@ -53,6 +53,7 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain,
// print config // print config
std::cerr << "lsfg-vk: Reloaded configuration for " << name.second << ":\n"; std::cerr << "lsfg-vk: Reloaded configuration for " << name.second << ":\n";
if (!conf.dll.empty()) std::cerr << " Using DLL from: " << conf.dll << '\n'; if (!conf.dll.empty()) std::cerr << " Using DLL from: " << conf.dll << '\n';
if (conf.no_fp16) std::cerr << " FP16 Acceleration: Force-disabled\n";
std::cerr << " Multiplier: " << conf.multiplier << '\n'; std::cerr << " Multiplier: " << conf.multiplier << '\n';
std::cerr << " Flow Scale: " << conf.flowScale << '\n'; std::cerr << " Flow Scale: " << conf.flowScale << '\n';
std::cerr << " Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n'; std::cerr << " Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n';
@ -98,6 +99,7 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain,
lsfgInitialize( lsfgInitialize(
Utils::getDeviceUUID(info.physicalDevice), Utils::getDeviceUUID(info.physicalDevice),
conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1, conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1,
conf.no_fp16,
Extract::getShader Extract::getShader
); );

View file

@ -47,6 +47,7 @@ namespace {
// print config // print config
std::cerr << "lsfg-vk: Loaded configuration for " << name.second << ":\n"; std::cerr << "lsfg-vk: Loaded configuration for " << name.second << ":\n";
if (!conf.dll.empty()) std::cerr << " Using DLL from: " << conf.dll << '\n'; if (!conf.dll.empty()) std::cerr << " Using DLL from: " << conf.dll << '\n';
if (conf.no_fp16) std::cerr << " FP16 Acceleration: Force-disabled\n";
std::cerr << " Multiplier: " << conf.multiplier << '\n'; std::cerr << " Multiplier: " << conf.multiplier << '\n';
std::cerr << " Flow Scale: " << conf.flowScale << '\n'; std::cerr << " Flow Scale: " << conf.flowScale << '\n';
std::cerr << " Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n'; std::cerr << " Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n';

View file

@ -41,6 +41,7 @@ void Benchmark::run(uint32_t width, uint32_t height) {
lsfgInitialize( lsfgInitialize(
deviceUUID, // some magic number if not given deviceUUID, // some magic number if not given
conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1, conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1,
conf.no_fp16,
Extract::getShader Extract::getShader
); );
const int32_t ctx = lsfgCreateContext(-1, -1, {}, const int32_t ctx = lsfgCreateContext(-1, -1, {},