mirror of
				https://github.com/PancakeTAS/lsfg-vk.git
				synced 2025-10-30 07:01:10 +00:00 
			
		
		
		
	feat(fp16): add flag for overriding fp16
This commit is contained in:
		
							parent
							
								
									73e09afcf4
								
							
						
					
					
						commit
						012b18b97c
					
				
					 11 changed files with 22 additions and 7 deletions
				
			
		|  | @ -21,10 +21,11 @@ namespace LSFG::Core { | ||||||
|         ///
 |         ///
 | ||||||
|         /// @param instance Vulkan instance
 |         /// @param instance Vulkan instance
 | ||||||
|         /// @param deviceUUID The UUID of the Vulkan device to use.
 |         /// @param deviceUUID The UUID of the Vulkan device to use.
 | ||||||
|  |         /// @param forceDisableFp16 Force-disable FP16 shaders.
 | ||||||
|         ///
 |         ///
 | ||||||
|         /// @throws LSFG::vulkan_error if object creation fails.
 |         /// @throws LSFG::vulkan_error if object creation fails.
 | ||||||
|         ///
 |         ///
 | ||||||
|         Device(const Instance& instance, uint64_t deviceUUID); |         Device(const Instance& instance, uint64_t deviceUUID, bool forceDisableFp16); | ||||||
| 
 | 
 | ||||||
|         /// Get the Vulkan handle.
 |         /// Get the Vulkan handle.
 | ||||||
|         [[nodiscard]] auto handle() const { return *this->device; } |         [[nodiscard]] auto handle() const { return *this->device; } | ||||||
|  |  | ||||||
|  | @ -16,6 +16,7 @@ namespace LSFG_3_1 { | ||||||
|     /// @param isHdr Whether the images are in HDR format.
 |     /// @param isHdr Whether the images are in HDR format.
 | ||||||
|     /// @param flowScale Internal flow scale factor.
 |     /// @param flowScale Internal flow scale factor.
 | ||||||
|     /// @param generationCount Number of frames to generate.
 |     /// @param generationCount Number of frames to generate.
 | ||||||
|  |     /// @param forceDisableFp16 Whether to force-disable FP16 optimizations.
 | ||||||
|     /// @param loader Function to load shader source code by name.
 |     /// @param loader Function to load shader source code by name.
 | ||||||
|     ///
 |     ///
 | ||||||
|     /// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
 |     /// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
 | ||||||
|  | @ -23,6 +24,7 @@ namespace LSFG_3_1 { | ||||||
|     [[gnu::visibility("default")]] |     [[gnu::visibility("default")]] | ||||||
|     void initialize(uint64_t deviceUUID, |     void initialize(uint64_t deviceUUID, | ||||||
|         bool isHdr, float flowScale, uint64_t generationCount, |         bool isHdr, float flowScale, uint64_t generationCount, | ||||||
|  |         bool forceDisableFp16, | ||||||
|         const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader); |         const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader); | ||||||
| 
 | 
 | ||||||
|     ///
 |     ///
 | ||||||
|  |  | ||||||
|  | @ -16,6 +16,7 @@ namespace LSFG_3_1P { | ||||||
|     /// @param isHdr Whether the images are in HDR format.
 |     /// @param isHdr Whether the images are in HDR format.
 | ||||||
|     /// @param flowScale Internal flow scale factor.
 |     /// @param flowScale Internal flow scale factor.
 | ||||||
|     /// @param generationCount Number of frames to generate.
 |     /// @param generationCount Number of frames to generate.
 | ||||||
|  |     /// @param forceDisableFp16 Whether to force-disable FP16 optimizations.
 | ||||||
|     /// @param loader Function to load shader source code by name.
 |     /// @param loader Function to load shader source code by name.
 | ||||||
|     ///
 |     ///
 | ||||||
|     /// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
 |     /// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
 | ||||||
|  | @ -23,6 +24,7 @@ namespace LSFG_3_1P { | ||||||
|     [[gnu::visibility("default")]] |     [[gnu::visibility("default")]] | ||||||
|     void initialize(uint64_t deviceUUID, |     void initialize(uint64_t deviceUUID, | ||||||
|         bool isHdr, float flowScale, uint64_t generationCount, |         bool isHdr, float flowScale, uint64_t generationCount, | ||||||
|  |         bool forceDisableFp16, | ||||||
|         const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader); |         const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader); | ||||||
| 
 | 
 | ||||||
|     ///
 |     ///
 | ||||||
|  |  | ||||||
|  | @ -19,7 +19,7 @@ const std::vector<const char*> requiredExtensions = { | ||||||
|     "VK_EXT_robustness2" |     "VK_EXT_robustness2" | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| Device::Device(const Instance& instance, uint64_t deviceUUID) { | Device::Device(const Instance& instance, uint64_t deviceUUID, bool forceDisableFp16) { | ||||||
|     // get all physical devices
 |     // get all physical devices
 | ||||||
|     uint32_t deviceCount{}; |     uint32_t deviceCount{}; | ||||||
|     auto res = vkEnumeratePhysicalDevices(instance.handle(), &deviceCount, nullptr); |     auto res = vkEnumeratePhysicalDevices(instance.handle(), &deviceCount, nullptr); | ||||||
|  | @ -72,10 +72,10 @@ Device::Device(const Instance& instance, uint64_t deviceUUID) { | ||||||
|         .pNext = &supported12Features |         .pNext = &supported12Features | ||||||
|     }; |     }; | ||||||
|     vkGetPhysicalDeviceFeatures2(*physicalDevice, &supportedFeatures); |     vkGetPhysicalDeviceFeatures2(*physicalDevice, &supportedFeatures); | ||||||
|     this->supportsFP16 = supported12Features.shaderFloat16; |     this->supportsFP16 = !forceDisableFp16 && supported12Features.shaderFloat16; | ||||||
|     if (this->supportsFP16) |     if (this->supportsFP16) | ||||||
|         std::cerr << "lsfg-vk: Using FP16 acceleration" << '\n'; |         std::cerr << "lsfg-vk: Using FP16 acceleration" << '\n'; | ||||||
|     else |     else if (!forceDisableFp16) | ||||||
|         std::cerr << "lsfg-vk: FP16 acceleration not supported, using FP32" << '\n'; |         std::cerr << "lsfg-vk: FP16 acceleration not supported, using FP32" << '\n'; | ||||||
| 
 | 
 | ||||||
|     // create logical device
 |     // create logical device
 | ||||||
|  |  | ||||||
|  | @ -35,13 +35,14 @@ namespace { | ||||||
| 
 | 
 | ||||||
| void LSFG_3_1::initialize(uint64_t deviceUUID, | void LSFG_3_1::initialize(uint64_t deviceUUID, | ||||||
|         bool isHdr, float flowScale, uint64_t generationCount, |         bool isHdr, float flowScale, uint64_t generationCount, | ||||||
|  |         bool forceDisableFp16, | ||||||
|         const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) { |         const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) { | ||||||
|     if (instance.has_value() || device.has_value()) |     if (instance.has_value() || device.has_value()) | ||||||
|         return; |         return; | ||||||
| 
 | 
 | ||||||
|     instance.emplace(); |     instance.emplace(); | ||||||
|     device.emplace(Vulkan { |     device.emplace(Vulkan { | ||||||
|         .device{*instance, deviceUUID}, |         .device{*instance, deviceUUID, forceDisableFp16}, | ||||||
|         .generationCount = generationCount, |         .generationCount = generationCount, | ||||||
|         .flowScale = flowScale, |         .flowScale = flowScale, | ||||||
|         .isHdr = isHdr |         .isHdr = isHdr | ||||||
|  |  | ||||||
|  | @ -35,13 +35,14 @@ namespace { | ||||||
| 
 | 
 | ||||||
| void LSFG_3_1P::initialize(uint64_t deviceUUID, | void LSFG_3_1P::initialize(uint64_t deviceUUID, | ||||||
|         bool isHdr, float flowScale, uint64_t generationCount, |         bool isHdr, float flowScale, uint64_t generationCount, | ||||||
|  |         bool forceDisableFp16, | ||||||
|         const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) { |         const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) { | ||||||
|     if (instance.has_value() || device.has_value()) |     if (instance.has_value() || device.has_value()) | ||||||
|         return; |         return; | ||||||
| 
 | 
 | ||||||
|     instance.emplace(); |     instance.emplace(); | ||||||
|     device.emplace(Vulkan { |     device.emplace(Vulkan { | ||||||
|         .device{*instance, deviceUUID}, |         .device{*instance, deviceUUID, forceDisableFp16}, | ||||||
|         .generationCount = generationCount, |         .generationCount = generationCount, | ||||||
|         .flowScale = flowScale, |         .flowScale = flowScale, | ||||||
|         .isHdr = isHdr |         .isHdr = isHdr | ||||||
|  |  | ||||||
|  | @ -15,6 +15,8 @@ namespace Config { | ||||||
|         bool enable{false}; |         bool enable{false}; | ||||||
|         /// Path to Lossless.dll.
 |         /// Path to Lossless.dll.
 | ||||||
|         std::string dll; |         std::string dll; | ||||||
|  |         /// Whether FP16 is force-disabled
 | ||||||
|  |         bool no_fp16{false}; | ||||||
| 
 | 
 | ||||||
|         /// The frame generation muliplier
 |         /// The frame generation muliplier
 | ||||||
|         size_t multiplier{2}; |         size_t multiplier{2}; | ||||||
|  |  | ||||||
|  | @ -73,7 +73,8 @@ void Config::updateConfig(const std::string& file) { | ||||||
|     // parse global configuration
 |     // parse global configuration
 | ||||||
|     const toml::value globalTable = toml::find_or_default<toml::table>(toml, "global"); |     const toml::value globalTable = toml::find_or_default<toml::table>(toml, "global"); | ||||||
|     const Configuration global{ |     const Configuration global{ | ||||||
|         .dll =   toml::find_or(globalTable, "dll", std::string()), |         .dll =     toml::find_or(globalTable, "dll", std::string()), | ||||||
|  |         .no_fp16 = toml::find_or(globalTable, "no_fp16", false), | ||||||
|         .config_file = file, |         .config_file = file, | ||||||
|         .timestamp = std::filesystem::last_write_time(file) |         .timestamp = std::filesystem::last_write_time(file) | ||||||
|     }; |     }; | ||||||
|  | @ -97,6 +98,7 @@ void Config::updateConfig(const std::string& file) { | ||||||
|         Configuration game{ |         Configuration game{ | ||||||
|             .enable = true, |             .enable = true, | ||||||
|             .dll = global.dll, |             .dll = global.dll, | ||||||
|  |             .no_fp16 = global.no_fp16, | ||||||
|             .multiplier = toml::find_or(gameTable, "multiplier", 2U), |             .multiplier = toml::find_or(gameTable, "multiplier", 2U), | ||||||
|             .flowScale = toml::find_or(gameTable, "flow_scale", 1.0F), |             .flowScale = toml::find_or(gameTable, "flow_scale", 1.0F), | ||||||
|             .performance = toml::find_or(gameTable, "performance_mode", false), |             .performance = toml::find_or(gameTable, "performance_mode", false), | ||||||
|  |  | ||||||
|  | @ -53,6 +53,7 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain, | ||||||
|         // print config
 |         // print config
 | ||||||
|         std::cerr << "lsfg-vk: Reloaded configuration for " << name.second << ":\n"; |         std::cerr << "lsfg-vk: Reloaded configuration for " << name.second << ":\n"; | ||||||
|         if (!conf.dll.empty()) std::cerr << "  Using DLL from: " << conf.dll << '\n'; |         if (!conf.dll.empty()) std::cerr << "  Using DLL from: " << conf.dll << '\n'; | ||||||
|  |         if (conf.no_fp16) std::cerr << "  FP16 Acceleration: Force-disabled\n"; | ||||||
|         std::cerr << "  Multiplier: " << conf.multiplier << '\n'; |         std::cerr << "  Multiplier: " << conf.multiplier << '\n'; | ||||||
|         std::cerr << "  Flow Scale: " << conf.flowScale << '\n'; |         std::cerr << "  Flow Scale: " << conf.flowScale << '\n'; | ||||||
|         std::cerr << "  Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n'; |         std::cerr << "  Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n'; | ||||||
|  | @ -98,6 +99,7 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain, | ||||||
|     lsfgInitialize( |     lsfgInitialize( | ||||||
|         Utils::getDeviceUUID(info.physicalDevice), |         Utils::getDeviceUUID(info.physicalDevice), | ||||||
|         conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1, |         conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1, | ||||||
|  |         conf.no_fp16, | ||||||
|         Extract::getShader |         Extract::getShader | ||||||
|     ); |     ); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -47,6 +47,7 @@ namespace { | ||||||
|         // print config
 |         // print config
 | ||||||
|         std::cerr << "lsfg-vk: Loaded configuration for " << name.second << ":\n"; |         std::cerr << "lsfg-vk: Loaded configuration for " << name.second << ":\n"; | ||||||
|         if (!conf.dll.empty()) std::cerr << "  Using DLL from: " << conf.dll << '\n'; |         if (!conf.dll.empty()) std::cerr << "  Using DLL from: " << conf.dll << '\n'; | ||||||
|  |         if (conf.no_fp16) std::cerr << "  FP16 Acceleration: Force-disabled\n"; | ||||||
|         std::cerr << "  Multiplier: " << conf.multiplier << '\n'; |         std::cerr << "  Multiplier: " << conf.multiplier << '\n'; | ||||||
|         std::cerr << "  Flow Scale: " << conf.flowScale << '\n'; |         std::cerr << "  Flow Scale: " << conf.flowScale << '\n'; | ||||||
|         std::cerr << "  Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n'; |         std::cerr << "  Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n'; | ||||||
|  |  | ||||||
|  | @ -41,6 +41,7 @@ void Benchmark::run(uint32_t width, uint32_t height) { | ||||||
|     lsfgInitialize( |     lsfgInitialize( | ||||||
|         deviceUUID, // some magic number if not given
 |         deviceUUID, // some magic number if not given
 | ||||||
|         conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1, |         conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1, | ||||||
|  |         conf.no_fp16, | ||||||
|         Extract::getShader |         Extract::getShader | ||||||
|     ); |     ); | ||||||
|     const int32_t ctx = lsfgCreateContext(-1, -1, {}, |     const int32_t ctx = lsfgCreateContext(-1, -1, {}, | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		
		Reference in a new issue
	
	 PancakeTAS
						PancakeTAS