mirror of
				https://github.com/PancakeTAS/lsfg-vk.git
				synced 2025-10-30 07:01:10 +00:00 
			
		
		
		
	Compare commits
	
		
			23 commits
		
	
	
		
			769d7cb4bd
			...
			61972f448b
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 61972f448b | ||
|   | f74b1d350f | ||
|   | 74c5ad1aeb | ||
|   | f9bb5fb765 | ||
|   | 3b06902090 | ||
|   | 99dfa3af78 | ||
|   | a70dfc9d0c | ||
|   | 65541aa960 | ||
|   | 091b0f7eca | ||
|   | f0912890c0 | ||
|   | da16437210 | ||
|   | a71b994d74 | ||
|   | f5690d741c | ||
|   | 7bac21f793 | ||
|   | 3a86e5ade4 | ||
|   | 012b18b97c | ||
|   | 73e09afcf4 | ||
|   | 28d293d531 | ||
|   | 77d1b68b8b | ||
|   | 6c3571e672 | ||
|   | cb234bde74 | ||
|   | 3fcde7c126 | ||
|   | b93a4eeaf2 | 
					 36 changed files with 325 additions and 247 deletions
				
			
		
							
								
								
									
										3
									
								
								.gitmodules
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitmodules
									
										
									
									
										vendored
									
									
								
							|  | @ -1,9 +1,6 @@ | |||
| [submodule "thirdparty/pe-parse"] | ||||
| 	path = thirdparty/pe-parse | ||||
| 	url = https://github.com/trailofbits/pe-parse | ||||
| [submodule "thirdparty/dxbc"] | ||||
| 	path = thirdparty/dxbc | ||||
| 	url = https://github.com/PancakeTAS/dxbc.git | ||||
| [submodule "thirdparty/toml11"] | ||||
| 	path = thirdparty/toml11 | ||||
| 	url = https://github.com/ToruNiina/toml11 | ||||
|  |  | |||
|  | @ -12,7 +12,6 @@ add_compile_options(-fPIC | |||
|     -Wno-deprecated-declarations | ||||
|     -Wno-unused-template) | ||||
| 
 | ||||
| add_subdirectory(thirdparty/dxbc EXCLUDE_FROM_ALL) | ||||
| add_subdirectory(thirdparty/pe-parse/pe-parser-library EXCLUDE_FROM_ALL) | ||||
| add_subdirectory(thirdparty/toml11 EXCLUDE_FROM_ALL) | ||||
| add_subdirectory(thirdparty/volk EXCLUDE_FROM_ALL) | ||||
|  | @ -45,7 +44,7 @@ set_target_properties(lsfg-vk PROPERTIES | |||
| target_include_directories(lsfg-vk | ||||
|     PUBLIC include) | ||||
| target_link_libraries(lsfg-vk PUBLIC | ||||
|     pe-parse dxbc toml11 | ||||
|     pe-parse toml11 | ||||
|     lsfg-vk-framegen) | ||||
| 
 | ||||
| get_target_property(TOML11_INCLUDE_DIRS toml11 INTERFACE_INCLUDE_DIRECTORIES) | ||||
|  |  | |||
|  | @ -5,6 +5,10 @@ if(NOT LSFGVK_EXCESS_DEBUG) | |||
|     set(CMAKE_CXX_VISIBILITY_PRESET "hidden") | ||||
| endif() | ||||
| 
 | ||||
| if(LSFGVK_EXCESS_DEBUG) | ||||
|     add_compile_definitions(LSFGVK_EXCESS_DEBUG) | ||||
| endif() | ||||
| 
 | ||||
| project(lsfg-vk-framegen | ||||
|     DESCRIPTION "Lossless Scaling Frame Generation Backend" | ||||
|     LANGUAGES CXX) | ||||
|  |  | |||
|  | @ -119,6 +119,10 @@ namespace LSFG::Core { | |||
|                 : descriptorSet(&descriptorSet), device(&device) {} | ||||
| 
 | ||||
|         std::vector<VkWriteDescriptorSet> entries; | ||||
|         size_t bufferIdx{0}; | ||||
|         size_t samplerIdx{16}; | ||||
|         size_t inputIdx{32}; | ||||
|         size_t outputIdx{48}; | ||||
|     }; | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -21,10 +21,11 @@ namespace LSFG::Core { | |||
|         ///
 | ||||
|         /// @param instance Vulkan instance
 | ||||
|         /// @param deviceUUID The UUID of the Vulkan device to use.
 | ||||
|         /// @param forceDisableFp16 Force-disable FP16 shaders.
 | ||||
|         ///
 | ||||
|         /// @throws LSFG::vulkan_error if object creation fails.
 | ||||
|         ///
 | ||||
|         Device(const Instance& instance, uint64_t deviceUUID); | ||||
|         Device(const Instance& instance, uint64_t deviceUUID, bool forceDisableFp16); | ||||
| 
 | ||||
|         /// Get the Vulkan handle.
 | ||||
|         [[nodiscard]] auto handle() const { return *this->device; } | ||||
|  | @ -34,6 +35,8 @@ namespace LSFG::Core { | |||
|         [[nodiscard]] uint32_t getComputeFamilyIdx() const { return this->computeFamilyIdx; } | ||||
|         /// Get the compute queue.
 | ||||
|         [[nodiscard]] VkQueue getComputeQueue() const { return this->computeQueue; } | ||||
|         /// Check if the device supports FP16.
 | ||||
|         [[nodiscard]] bool getFP16Support() const { return this->supportsFP16; } | ||||
| 
 | ||||
|         // Trivially copyable, moveable and destructible
 | ||||
|         Device(const Core::Device&) noexcept = default; | ||||
|  | @ -46,6 +49,7 @@ namespace LSFG::Core { | |||
|         VkPhysicalDevice physicalDevice{}; | ||||
| 
 | ||||
|         uint32_t computeFamilyIdx{0}; | ||||
|         bool supportsFP16{false}; | ||||
| 
 | ||||
|         VkQueue computeQueue{}; | ||||
|     }; | ||||
|  |  | |||
|  | @ -27,11 +27,14 @@ namespace LSFG::Pool { | |||
|         /// Create the shader pool.
 | ||||
|         ///
 | ||||
|         /// @param source Function to retrieve shader source code by name.
 | ||||
|         /// @param fp16 If true, use the FP16 variant of shaders.
 | ||||
|         ///
 | ||||
|         /// @throws std::runtime_error if the shader pool cannot be created.
 | ||||
|         ///
 | ||||
|         ShaderPool(const std::function<std::vector<uint8_t>(const std::string&)>& source) | ||||
|             : source(source) {} | ||||
|         ShaderPool( | ||||
|                 const std::function<std::vector<uint8_t>(const std::string&, bool)>& source, | ||||
|                 bool fp16) | ||||
|             : source(source), fp16(fp16) {} | ||||
| 
 | ||||
|         ///
 | ||||
|         /// Retrieve a shader module by name or create it.
 | ||||
|  | @ -57,7 +60,9 @@ namespace LSFG::Pool { | |||
|         Core::Pipeline getPipeline( | ||||
|             const Core::Device& device, const std::string& name); | ||||
|     private: | ||||
|         std::function<std::vector<uint8_t>(const std::string&)> source; | ||||
|         std::function<std::vector<uint8_t>(const std::string&, bool)> source; | ||||
|         bool fp16{false}; | ||||
| 
 | ||||
|         std::unordered_map<std::string, Core::ShaderModule> shaders; | ||||
|         std::unordered_map<std::string, Core::Pipeline> pipelines; | ||||
|     }; | ||||
|  |  | |||
|  | @ -16,6 +16,7 @@ namespace LSFG_3_1 { | |||
|     /// @param isHdr Whether the images are in HDR format.
 | ||||
|     /// @param flowScale Internal flow scale factor.
 | ||||
|     /// @param generationCount Number of frames to generate.
 | ||||
|     /// @param forceDisableFp16 Whether to force-disable FP16 optimizations.
 | ||||
|     /// @param loader Function to load shader source code by name.
 | ||||
|     ///
 | ||||
|     /// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
 | ||||
|  | @ -23,8 +24,10 @@ namespace LSFG_3_1 { | |||
|     [[gnu::visibility("default")]] | ||||
|     void initialize(uint64_t deviceUUID, | ||||
|         bool isHdr, float flowScale, uint64_t generationCount, | ||||
|         const std::function<std::vector<uint8_t>(const std::string&)>& loader); | ||||
|         bool forceDisableFp16, | ||||
|         const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader); | ||||
| 
 | ||||
| #ifdef LSFGVK_EXCESS_DEBUG | ||||
|     ///
 | ||||
|     /// Initialize the renderdoc API.
 | ||||
|     ///
 | ||||
|  | @ -32,6 +35,7 @@ namespace LSFG_3_1 { | |||
|     ///
 | ||||
|     [[gnu::visibility("default")]] | ||||
|     void initializeRenderDoc(); | ||||
| #endif // LSFGVK_EXCESS_DEBUG
 | ||||
| 
 | ||||
|     ///
 | ||||
|     /// Create a new LSFG context on a swapchain.
 | ||||
|  |  | |||
|  | @ -16,6 +16,7 @@ namespace LSFG_3_1P { | |||
|     /// @param isHdr Whether the images are in HDR format.
 | ||||
|     /// @param flowScale Internal flow scale factor.
 | ||||
|     /// @param generationCount Number of frames to generate.
 | ||||
|     /// @param forceDisableFp16 Whether to force-disable FP16 optimizations.
 | ||||
|     /// @param loader Function to load shader source code by name.
 | ||||
|     ///
 | ||||
|     /// @throws LSFG::vulkan_error if Vulkan objects fail to initialize.
 | ||||
|  | @ -23,8 +24,10 @@ namespace LSFG_3_1P { | |||
|     [[gnu::visibility("default")]] | ||||
|     void initialize(uint64_t deviceUUID, | ||||
|         bool isHdr, float flowScale, uint64_t generationCount, | ||||
|         const std::function<std::vector<uint8_t>(const std::string&)>& loader); | ||||
|         bool forceDisableFp16, | ||||
|         const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader); | ||||
| 
 | ||||
| #ifdef LSFGVK_EXCESS_DEBUG | ||||
|     ///
 | ||||
|     /// Initialize the renderdoc API.
 | ||||
|     ///
 | ||||
|  | @ -32,6 +35,7 @@ namespace LSFG_3_1P { | |||
|     ///
 | ||||
|     [[gnu::visibility("default")]] | ||||
|     void initializeRenderDoc(); | ||||
| #endif // LSFGVK_EXCESS_DEBUG
 | ||||
| 
 | ||||
|     ///
 | ||||
|     /// Create a new LSFG context on a swapchain.
 | ||||
|  |  | |||
|  | @ -12,8 +12,9 @@ | |||
| #include "core/buffer.hpp" | ||||
| #include "common/exception.hpp" | ||||
| 
 | ||||
| #include <memory> | ||||
| #include <cstddef> | ||||
| #include <cstdint> | ||||
| #include <memory> | ||||
| 
 | ||||
| using namespace LSFG::Core; | ||||
| 
 | ||||
|  | @ -55,10 +56,11 @@ void DescriptorSet::bind(const CommandBuffer& commandBuffer, const Pipeline& pip | |||
| // updater class
 | ||||
| 
 | ||||
| DescriptorSetUpdateBuilder& DescriptorSetUpdateBuilder::add(VkDescriptorType type, const Image& image) { | ||||
|     size_t* idx{type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ? &this->outputIdx : &this->inputIdx}; | ||||
|     this->entries.push_back({ | ||||
|         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||||
|         .dstSet = this->descriptorSet->handle(), | ||||
|         .dstBinding = static_cast<uint32_t>(this->entries.size()), | ||||
|         .dstBinding = static_cast<uint32_t>(*idx), | ||||
|         .descriptorCount = 1, | ||||
|         .descriptorType = type, | ||||
|         .pImageInfo = new VkDescriptorImageInfo { | ||||
|  | @ -67,6 +69,7 @@ DescriptorSetUpdateBuilder& DescriptorSetUpdateBuilder::add(VkDescriptorType typ | |||
|         }, | ||||
|         .pBufferInfo = nullptr | ||||
|     }); | ||||
|     (*idx)++; | ||||
|     return *this; | ||||
| } | ||||
| 
 | ||||
|  | @ -74,7 +77,7 @@ DescriptorSetUpdateBuilder& DescriptorSetUpdateBuilder::add(VkDescriptorType typ | |||
|     this->entries.push_back({ | ||||
|         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||||
|         .dstSet = this->descriptorSet->handle(), | ||||
|         .dstBinding = static_cast<uint32_t>(this->entries.size()), | ||||
|         .dstBinding = static_cast<uint32_t>(this->samplerIdx++), | ||||
|         .descriptorCount = 1, | ||||
|         .descriptorType = type, | ||||
|         .pImageInfo = new VkDescriptorImageInfo { | ||||
|  | @ -89,7 +92,7 @@ DescriptorSetUpdateBuilder& DescriptorSetUpdateBuilder::add(VkDescriptorType typ | |||
|     this->entries.push_back({ | ||||
|         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||||
|         .dstSet = this->descriptorSet->handle(), | ||||
|         .dstBinding = static_cast<uint32_t>(this->entries.size()), | ||||
|         .dstBinding = static_cast<uint32_t>(this->bufferIdx++), | ||||
|         .descriptorCount = 1, | ||||
|         .descriptorType = type, | ||||
|         .pImageInfo = nullptr, | ||||
|  | @ -102,16 +105,34 @@ DescriptorSetUpdateBuilder& DescriptorSetUpdateBuilder::add(VkDescriptorType typ | |||
| } | ||||
| 
 | ||||
| DescriptorSetUpdateBuilder& DescriptorSetUpdateBuilder::add(VkDescriptorType type) { | ||||
|     size_t* idx{}; | ||||
|     switch (type) { | ||||
|         case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: | ||||
|             idx = &this->inputIdx; | ||||
|             break; | ||||
|         case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: | ||||
|             idx = &this->outputIdx; | ||||
|             break; | ||||
|         case VK_DESCRIPTOR_TYPE_SAMPLER: | ||||
|             idx = &this->samplerIdx; | ||||
|             break; | ||||
|         case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: | ||||
|             idx = &this->bufferIdx; | ||||
|             break; | ||||
|         default: | ||||
|             throw LSFG::vulkan_error(VK_ERROR_UNKNOWN, "Unsupported descriptor type"); | ||||
|     } | ||||
|     this->entries.push_back({ | ||||
|         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, | ||||
|         .dstSet = this->descriptorSet->handle(), | ||||
|         .dstBinding = static_cast<uint32_t>(this->entries.size()), | ||||
|         .dstBinding = static_cast<uint32_t>(*idx), | ||||
|         .descriptorCount = 1, | ||||
|         .descriptorType = type, | ||||
|         .pImageInfo = new VkDescriptorImageInfo { | ||||
|         }, | ||||
|         .pBufferInfo = nullptr | ||||
|     }); | ||||
|     (*idx)++; | ||||
|     return *this; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,3 +1,4 @@ | |||
| #include <iostream> | ||||
| #include <volk.h> | ||||
| #include <vulkan/vulkan_core.h> | ||||
| 
 | ||||
|  | @ -15,10 +16,10 @@ using namespace LSFG::Core; | |||
| const std::vector<const char*> requiredExtensions = { | ||||
|     "VK_KHR_external_memory_fd", | ||||
|     "VK_KHR_external_semaphore_fd", | ||||
|     "VK_EXT_robustness2", | ||||
|     "VK_EXT_robustness2" | ||||
| }; | ||||
| 
 | ||||
| Device::Device(const Instance& instance, uint64_t deviceUUID) { | ||||
| Device::Device(const Instance& instance, uint64_t deviceUUID, bool forceDisableFp16) { | ||||
|     // get all physical devices
 | ||||
|     uint32_t deviceCount{}; | ||||
|     auto res = vkEnumeratePhysicalDevices(instance.handle(), &deviceCount, nullptr); | ||||
|  | @ -62,11 +63,26 @@ Device::Device(const Instance& instance, uint64_t deviceUUID) { | |||
|     if (!computeFamilyIdx) | ||||
|         throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "No compute queue family found"); | ||||
| 
 | ||||
|     // check if physical device supports float16
 | ||||
|     VkPhysicalDeviceVulkan12Features supported12Features{ | ||||
|         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES | ||||
|     }; | ||||
|     VkPhysicalDeviceFeatures2 supportedFeatures{ | ||||
|         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, | ||||
|         .pNext = &supported12Features | ||||
|     }; | ||||
|     vkGetPhysicalDeviceFeatures2(*physicalDevice, &supportedFeatures); | ||||
|     this->supportsFP16 = !forceDisableFp16 && supported12Features.shaderFloat16; | ||||
|     if (this->supportsFP16) | ||||
|         std::cerr << "lsfg-vk: Using FP16 acceleration" << '\n'; | ||||
|     else if (!forceDisableFp16) | ||||
|         std::cerr << "lsfg-vk: FP16 acceleration not supported, using FP32" << '\n'; | ||||
| 
 | ||||
|     // create logical device
 | ||||
|     const float queuePriority{1.0F}; // highest priority
 | ||||
|     VkPhysicalDeviceRobustness2FeaturesEXT robustness2{ | ||||
|         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT, | ||||
|         .nullDescriptor = VK_TRUE, | ||||
|         .nullDescriptor = VK_TRUE | ||||
|     }; | ||||
|     VkPhysicalDeviceVulkan13Features features13{ | ||||
|         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES, | ||||
|  | @ -76,6 +92,7 @@ Device::Device(const Instance& instance, uint64_t deviceUUID) { | |||
|     const VkPhysicalDeviceVulkan12Features features12{ | ||||
|         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES, | ||||
|         .pNext = &features13, | ||||
|         .shaderFloat16 = this->supportsFP16, | ||||
|         .timelineSemaphore = VK_TRUE, | ||||
|         .vulkanMemoryModel = VK_TRUE | ||||
|     }; | ||||
|  |  | |||
|  | @ -29,16 +29,40 @@ ShaderModule::ShaderModule(const Core::Device& device, const std::vector<uint8_t | |||
| 
 | ||||
|     // create descriptor set layout
 | ||||
|     std::vector<VkDescriptorSetLayoutBinding> layoutBindings; | ||||
|     size_t bindIdx = 0; | ||||
|     size_t bufferIdx{0}; | ||||
|     size_t samplerIdx{16}; | ||||
|     size_t inputIdx{32}; | ||||
|     size_t outputIdx{48}; | ||||
|     for (const auto &[count, type] : descriptorTypes) | ||||
|         for (size_t i = 0; i < count; i++, bindIdx++) | ||||
|         for (size_t i = 0; i < count; i++) { | ||||
|             size_t* bindIdx{}; | ||||
|             switch (type) { | ||||
|                 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: | ||||
|                     bindIdx = &bufferIdx; | ||||
|                     break; | ||||
|                 case VK_DESCRIPTOR_TYPE_SAMPLER: | ||||
|                     bindIdx = &samplerIdx; | ||||
|                     break; | ||||
|                 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: | ||||
|                     bindIdx = &inputIdx; | ||||
|                     break; | ||||
|                 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: | ||||
|                     bindIdx = &outputIdx; | ||||
|                     break; | ||||
|                 default: | ||||
|                     throw LSFG::vulkan_error(VK_ERROR_UNKNOWN, "Unsupported descriptor type"); | ||||
|             } | ||||
| 
 | ||||
|             layoutBindings.emplace_back(VkDescriptorSetLayoutBinding { | ||||
|                 .binding = static_cast<uint32_t>(bindIdx), | ||||
|                 .binding = static_cast<uint32_t>(*bindIdx), | ||||
|                 .descriptorType = type, | ||||
|                 .descriptorCount = 1, | ||||
|                 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT | ||||
|             }); | ||||
| 
 | ||||
|             (*bindIdx)++; | ||||
|         } | ||||
| 
 | ||||
|     const VkDescriptorSetLayoutCreateInfo layoutDesc{ | ||||
|         .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, | ||||
|         .bindingCount = static_cast<uint32_t>(layoutBindings.size()), | ||||
|  |  | |||
|  | @ -22,7 +22,7 @@ Core::ShaderModule ShaderPool::getShader( | |||
|         return it->second; | ||||
| 
 | ||||
|     // grab the shader
 | ||||
|     auto bytecode = this->source(name); | ||||
|     auto bytecode = this->source(name, this->fp16); | ||||
|     if (bytecode.empty()) | ||||
|         throw std::runtime_error("Shader code is empty: " + name); | ||||
| 
 | ||||
|  |  | |||
|  | @ -10,8 +10,10 @@ | |||
| #include "common/exception.hpp" | ||||
| #include "common/utils.hpp" | ||||
| 
 | ||||
| #ifdef LSFGVK_EXCESS_DEBUG | ||||
| #include <renderdoc_app.h> | ||||
| #include <dlfcn.h> | ||||
| #endif // LSFGVK_EXCESS_DEBUG
 | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include <optional> | ||||
|  | @ -30,18 +32,21 @@ namespace { | |||
|     std::optional<Vulkan> device; | ||||
|     std::unordered_map<int32_t, Context> contexts; | ||||
| 
 | ||||
| #ifdef LSFGVK_EXCESS_DEBUG | ||||
|     std::optional<RENDERDOC_API_1_6_0*> renderdoc; | ||||
| #endif // LSFGVK_EXCESS_DEBUG
 | ||||
| } | ||||
| 
 | ||||
| void LSFG_3_1::initialize(uint64_t deviceUUID, | ||||
|         bool isHdr, float flowScale, uint64_t generationCount, | ||||
|         const std::function<std::vector<uint8_t>(const std::string&)>& loader) { | ||||
|         bool forceDisableFp16, | ||||
|         const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) { | ||||
|     if (instance.has_value() || device.has_value()) | ||||
|         return; | ||||
| 
 | ||||
|     instance.emplace(); | ||||
|     device.emplace(Vulkan { | ||||
|         .device{*instance, deviceUUID}, | ||||
|         .device{*instance, deviceUUID, forceDisableFp16}, | ||||
|         .generationCount = generationCount, | ||||
|         .flowScale = flowScale, | ||||
|         .isHdr = isHdr | ||||
|  | @ -52,11 +57,12 @@ void LSFG_3_1::initialize(uint64_t deviceUUID, | |||
|     device->descriptorPool = Core::DescriptorPool(device->device); | ||||
| 
 | ||||
|     device->resources = Pool::ResourcePool(device->isHdr, device->flowScale); | ||||
|     device->shaders = Pool::ShaderPool(loader); | ||||
|     device->shaders = Pool::ShaderPool(loader, device->device.getFP16Support()); | ||||
| 
 | ||||
|     std::srand(static_cast<uint32_t>(std::time(nullptr))); | ||||
| } | ||||
| 
 | ||||
| #ifdef LSFGVK_EXCESS_DEBUG | ||||
| void LSFG_3_1::initializeRenderDoc() { | ||||
|     if (renderdoc.has_value()) | ||||
|         return; | ||||
|  | @ -73,6 +79,7 @@ void LSFG_3_1::initializeRenderDoc() { | |||
|         throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "RenderDoc API not found"); | ||||
|     } | ||||
| } | ||||
| #endif // LSFGVK_EXCESS_DEBUG
 | ||||
| 
 | ||||
| int32_t LSFG_3_1::createContext( | ||||
|         int in0, int in1, const std::vector<int>& outN, | ||||
|  | @ -93,15 +100,19 @@ void LSFG_3_1::presentContext(int32_t id, int inSem, const std::vector<int>& out | |||
|     if (it == contexts.end()) | ||||
|         throw LSFG::vulkan_error(VK_ERROR_UNKNOWN, "Context not found"); | ||||
| 
 | ||||
| #ifdef LSFGVK_EXCESS_DEBUG | ||||
|     if (renderdoc.has_value()) | ||||
|         (*renderdoc)->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance->handle()), nullptr); | ||||
| #endif // LSFGVK_EXCESS_DEBUG
 | ||||
| 
 | ||||
|     it->second.present(*device, inSem, outSem); | ||||
| 
 | ||||
| #ifdef LSFGVK_EXCESS_DEBUG | ||||
|     if (renderdoc.has_value()) { | ||||
|         vkDeviceWaitIdle(device->device.handle()); | ||||
|         (*renderdoc)->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance->handle()), nullptr); | ||||
|     } | ||||
| #endif // LSFGVK_EXCESS_DEBUG
 | ||||
| } | ||||
| 
 | ||||
| void LSFG_3_1::deleteContext(int32_t id) { | ||||
|  |  | |||
|  | @ -10,8 +10,10 @@ | |||
| #include "common/exception.hpp" | ||||
| #include "common/utils.hpp" | ||||
| 
 | ||||
| #ifdef LSFGVK_EXCESS_DEBUG | ||||
| #include <renderdoc_app.h> | ||||
| #include <dlfcn.h> | ||||
| #endif // LSFGVK_EXCESS_DEBUG
 | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include <optional> | ||||
|  | @ -30,18 +32,21 @@ namespace { | |||
|     std::optional<Vulkan> device; | ||||
|     std::unordered_map<int32_t, Context> contexts; | ||||
| 
 | ||||
| #ifdef LSFGVK_EXCESS_DEBUG | ||||
|     std::optional<RENDERDOC_API_1_6_0*> renderdoc; | ||||
| #endif // LSFGVK_EXCESS_DEBUG
 | ||||
| } | ||||
| 
 | ||||
| void LSFG_3_1P::initialize(uint64_t deviceUUID, | ||||
|         bool isHdr, float flowScale, uint64_t generationCount, | ||||
|         const std::function<std::vector<uint8_t>(const std::string&)>& loader) { | ||||
|         bool forceDisableFp16, | ||||
|         const std::function<std::vector<uint8_t>(const std::string&, bool)>& loader) { | ||||
|     if (instance.has_value() || device.has_value()) | ||||
|         return; | ||||
| 
 | ||||
|     instance.emplace(); | ||||
|     device.emplace(Vulkan { | ||||
|         .device{*instance, deviceUUID}, | ||||
|         .device{*instance, deviceUUID, forceDisableFp16}, | ||||
|         .generationCount = generationCount, | ||||
|         .flowScale = flowScale, | ||||
|         .isHdr = isHdr | ||||
|  | @ -52,11 +57,12 @@ void LSFG_3_1P::initialize(uint64_t deviceUUID, | |||
|     device->descriptorPool = Core::DescriptorPool(device->device); | ||||
| 
 | ||||
|     device->resources = Pool::ResourcePool(device->isHdr, device->flowScale); | ||||
|     device->shaders = Pool::ShaderPool(loader); | ||||
|     device->shaders = Pool::ShaderPool(loader, device->device.getFP16Support()); | ||||
| 
 | ||||
|     std::srand(static_cast<uint32_t>(std::time(nullptr))); | ||||
| } | ||||
| 
 | ||||
| #ifdef LSFGVK_EXCESS_DEBUG | ||||
| void LSFG_3_1P::initializeRenderDoc() { | ||||
|     if (renderdoc.has_value()) | ||||
|         return; | ||||
|  | @ -73,6 +79,7 @@ void LSFG_3_1P::initializeRenderDoc() { | |||
|         throw LSFG::vulkan_error(VK_ERROR_INITIALIZATION_FAILED, "RenderDoc API not found"); | ||||
|     } | ||||
| } | ||||
| #endif // LSFGVK_EXCESS_DEBUG
 | ||||
| 
 | ||||
| int32_t LSFG_3_1P::createContext( | ||||
|         int in0, int in1, const std::vector<int>& outN, | ||||
|  | @ -93,15 +100,19 @@ void LSFG_3_1P::presentContext(int32_t id, int inSem, const std::vector<int>& ou | |||
|     if (it == contexts.end()) | ||||
|         throw LSFG::vulkan_error(VK_ERROR_UNKNOWN, "Context not found"); | ||||
| 
 | ||||
| #ifdef LSFGVK_EXCESS_DEBUG | ||||
|     if (renderdoc.has_value()) | ||||
|         (*renderdoc)->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance->handle()), nullptr); | ||||
| #endif // LSFGVK_EXCESS_DEBUG
 | ||||
| 
 | ||||
|     it->second.present(*device, inSem, outSem); | ||||
| 
 | ||||
| #ifdef LSFGVK_EXCESS_DEBUG | ||||
|     if (renderdoc.has_value()) { | ||||
|         vkDeviceWaitIdle(device->device.handle()); | ||||
|         (*renderdoc)->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(instance->handle()), nullptr); | ||||
|     } | ||||
| #endif // LSFGVK_EXCESS_DEBUG
 | ||||
| } | ||||
| 
 | ||||
| void LSFG_3_1P::deleteContext(int32_t id) { | ||||
|  |  | |||
|  | @ -21,12 +21,12 @@ Generate::Generate(Vulkan& vk, | |||
|           inImg3(std::move(inImg3)), inImg4(std::move(inImg4)), | ||||
|           inImg5(std::move(inImg5)) { | ||||
|     // create resources
 | ||||
|     this->shaderModule = vk.shaders.getShader(vk.device, "p_generate", | ||||
|     this->shaderModule = vk.shaders.getShader(vk.device, "generate", | ||||
|         { { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, | ||||
|           { 2, VK_DESCRIPTOR_TYPE_SAMPLER }, | ||||
|           { 5, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, | ||||
|           { 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }); | ||||
|     this->pipeline = vk.shaders.getPipeline(vk.device, "p_generate"); | ||||
|     this->pipeline = vk.shaders.getPipeline(vk.device, "generate"); | ||||
|     this->samplers.at(0) = vk.resources.getSampler(vk.device); | ||||
|     this->samplers.at(1) = vk.resources.getSampler(vk.device, | ||||
|         VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS); | ||||
|  |  | |||
|  | @ -16,12 +16,12 @@ Mipmaps::Mipmaps(Vulkan& vk, | |||
|         Core::Image inImg_0, Core::Image inImg_1) | ||||
|         : inImg_0(std::move(inImg_0)), inImg_1(std::move(inImg_1)) { | ||||
|     // create resources
 | ||||
|     this->shaderModule = vk.shaders.getShader(vk.device, "p_mipmaps", | ||||
|     this->shaderModule = vk.shaders.getShader(vk.device, "mipmaps", | ||||
|         { { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, | ||||
|           { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, | ||||
|           { 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, | ||||
|           { 7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }); | ||||
|     this->pipeline = vk.shaders.getPipeline(vk.device, "p_mipmaps"); | ||||
|     this->pipeline = vk.shaders.getPipeline(vk.device, "mipmaps"); | ||||
|     this->buffer = vk.resources.getBuffer(vk.device); | ||||
|     this->sampler = vk.resources.getSampler(vk.device); | ||||
|     for (size_t i = 0; i < 2; i++) | ||||
|  |  | |||
|  | @ -15,6 +15,8 @@ namespace Config { | |||
|         bool enable{false}; | ||||
|         /// Path to Lossless.dll.
 | ||||
|         std::string dll; | ||||
|         /// Whether FP16 is force-disabled
 | ||||
|         bool no_fp16{false}; | ||||
| 
 | ||||
|         /// The frame generation muliplier
 | ||||
|         size_t multiplier{2}; | ||||
|  |  | |||
|  | @ -6,6 +6,8 @@ const std::string DEFAULT_CONFIG = R"(version = 1 | |||
| [global] | ||||
| # override the location of Lossless Scaling | ||||
| # dll = "/games/Lossless Scaling/Lossless.dll" | ||||
| # force-disable fp16 (use on older nvidia cards) | ||||
| # no_fp16 = true | ||||
| 
 | ||||
| # [[game]] # example entry | ||||
| # exe = "Game.exe" | ||||
|  | @ -30,7 +32,7 @@ multiplier = 4 | |||
| performance_mode = false | ||||
| 
 | ||||
| [[game]] # override Genshin Impact | ||||
| exe = "Genshin" | ||||
| exe = "GenshinImpact.exe" | ||||
| 
 | ||||
| multiplier = 3 | ||||
| )"; | ||||
|  |  | |||
|  | @ -17,10 +17,11 @@ namespace Extract { | |||
|     /// Get a shader by name.
 | ||||
|     ///
 | ||||
|     /// @param name The name of the shader to get.
 | ||||
|     /// @param fp16 If true, use the FP16 variant of shaders.
 | ||||
|     /// @return The shader bytecode.
 | ||||
|     ///
 | ||||
|     /// @throws std::runtime_error if the shader is not found.
 | ||||
|     ///
 | ||||
|     std::vector<uint8_t> getShader(const std::string& name); | ||||
|     std::vector<uint8_t> getShader(const std::string& name, bool fp16); | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -1,16 +0,0 @@ | |||
| #pragma once | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include <vector> | ||||
| 
 | ||||
| namespace Extract { | ||||
| 
 | ||||
|     ///
 | ||||
|     /// Translate DXBC bytecode to SPIR-V bytecode.
 | ||||
|     ///
 | ||||
|     /// @param bytecode The DXBC bytecode to translate.
 | ||||
|     /// @return The translated SPIR-V bytecode.
 | ||||
|     ///
 | ||||
|     std::vector<uint8_t> translateShader(std::vector<uint8_t> bytecode); | ||||
| 
 | ||||
| } | ||||
|  | @ -73,7 +73,8 @@ void Config::updateConfig(const std::string& file) { | |||
|     // parse global configuration
 | ||||
|     const toml::value globalTable = toml::find_or_default<toml::table>(toml, "global"); | ||||
|     const Configuration global{ | ||||
|         .dll =   toml::find_or(globalTable, "dll", std::string()), | ||||
|         .dll =     toml::find_or(globalTable, "dll", std::string()), | ||||
|         .no_fp16 = toml::find_or(globalTable, "no_fp16", false), | ||||
|         .config_file = file, | ||||
|         .timestamp = std::filesystem::last_write_time(file) | ||||
|     }; | ||||
|  | @ -97,6 +98,7 @@ void Config::updateConfig(const std::string& file) { | |||
|         Configuration game{ | ||||
|             .enable = true, | ||||
|             .dll = global.dll, | ||||
|             .no_fp16 = global.no_fp16, | ||||
|             .multiplier = toml::find_or(gameTable, "multiplier", 2U), | ||||
|             .flowScale = toml::find_or(gameTable, "flow_scale", 1.0F), | ||||
|             .performance = toml::find_or(gameTable, "performance_mode", false), | ||||
|  |  | |||
|  | @ -2,7 +2,6 @@ | |||
| #include "config/config.hpp" | ||||
| #include "common/exception.hpp" | ||||
| #include "extract/extract.hpp" | ||||
| #include "extract/trans.hpp" | ||||
| #include "utils/utils.hpp" | ||||
| #include "hooks.hpp" | ||||
| #include "layer.hpp" | ||||
|  | @ -52,8 +51,9 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain, | |||
|         LSFG_3_1::finalize(); | ||||
| 
 | ||||
|         // print config
 | ||||
|         std::cerr << "lsfg-vk: Reloaded configuration for " << name.second << ":\n"; | ||||
|         std::cerr << "lsfg-vk: Reloaded configuration for " << name.first << ":\n"; | ||||
|         if (!conf.dll.empty()) std::cerr << "  Using DLL from: " << conf.dll << '\n'; | ||||
|         if (conf.no_fp16) std::cerr << "  FP16 Acceleration: Force-disabled\n"; | ||||
|         std::cerr << "  Multiplier: " << conf.multiplier << '\n'; | ||||
|         std::cerr << "  Flow Scale: " << conf.flowScale << '\n'; | ||||
|         std::cerr << "  Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n'; | ||||
|  | @ -99,11 +99,8 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain, | |||
|     lsfgInitialize( | ||||
|         Utils::getDeviceUUID(info.physicalDevice), | ||||
|         conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1, | ||||
|         [](const std::string& name) { | ||||
|             auto dxbc = Extract::getShader(name); | ||||
|             auto spirv = Extract::translateShader(dxbc); | ||||
|             return spirv; | ||||
|         } | ||||
|         conf.no_fp16, | ||||
|         Extract::getShader | ||||
|     ); | ||||
| 
 | ||||
|     this->lsfgCtxId = std::shared_ptr<int32_t>( | ||||
|  |  | |||
|  | @ -3,84 +3,89 @@ | |||
| 
 | ||||
| #include <pe-parse/parse.h> | ||||
| 
 | ||||
| #include <cstdlib> | ||||
| #include <unordered_map> | ||||
| #include <filesystem> | ||||
| #include <algorithm> | ||||
| #include <cstdint> | ||||
| #include <stdexcept> | ||||
| #include <cstdint> | ||||
| #include <cstdlib> | ||||
| #include <string> | ||||
| #include <unordered_map> | ||||
| #include <utility> | ||||
| #include <vector> | ||||
| #include <array> | ||||
| 
 | ||||
| using namespace Extract; | ||||
| 
 | ||||
| const uint32_t NO = 49; // native offset
 | ||||
| const uint32_t PO = NO + 23; // performance+native offset
 | ||||
| const uint32_t FP = 49; // fp32 offset
 | ||||
| const std::unordered_map<std::string, uint32_t> nameIdxTable = {{ | ||||
|     { "mipmaps", 255 }, | ||||
|     { "alpha[0]", 267 }, | ||||
|     { "alpha[1]", 268 }, | ||||
|     { "alpha[2]", 269 }, | ||||
|     { "alpha[3]", 270 }, | ||||
|     { "beta[0]", 275 }, | ||||
|     { "beta[1]", 276 }, | ||||
|     { "beta[2]", 277 }, | ||||
|     { "beta[3]", 278 }, | ||||
|     { "beta[4]", 279 }, | ||||
|     { "gamma[0]", 257 }, | ||||
|     { "gamma[1]", 259 }, | ||||
|     { "gamma[2]", 260 }, | ||||
|     { "gamma[3]", 261 }, | ||||
|     { "gamma[4]", 262 }, | ||||
|     { "delta[0]", 257 }, | ||||
|     { "delta[1]", 263 }, | ||||
|     { "delta[2]", 264 }, | ||||
|     { "delta[3]", 265 }, | ||||
|     { "delta[4]", 266 }, | ||||
|     { "delta[5]", 258 }, | ||||
|     { "delta[6]", 271 }, | ||||
|     { "delta[7]", 272 }, | ||||
|     { "delta[8]", 273 }, | ||||
|     { "delta[9]", 274 }, | ||||
|     { "generate", 256 }, | ||||
|     { "p_mipmaps", 255 }, | ||||
|     { "p_alpha[0]", 290 }, | ||||
|     { "p_alpha[1]", 291 }, | ||||
|     { "p_alpha[2]", 292 }, | ||||
|     { "p_alpha[3]", 293 }, | ||||
|     { "p_beta[0]", 298 }, | ||||
|     { "p_beta[1]", 299 }, | ||||
|     { "p_beta[2]", 300 }, | ||||
|     { "p_beta[3]", 301 }, | ||||
|     { "p_beta[4]", 302 }, | ||||
|     { "p_gamma[0]", 280 }, | ||||
|     { "p_gamma[1]", 282 }, | ||||
|     { "p_gamma[2]", 283 }, | ||||
|     { "p_gamma[3]", 284 }, | ||||
|     { "p_gamma[4]", 285 }, | ||||
|     { "p_delta[0]", 280 }, | ||||
|     { "p_delta[1]", 286 }, | ||||
|     { "p_delta[2]", 287 }, | ||||
|     { "p_delta[3]", 288 }, | ||||
|     { "p_delta[4]", 289 }, | ||||
|     { "p_delta[5]", 281 }, | ||||
|     { "p_delta[6]", 294 }, | ||||
|     { "p_delta[7]", 295 }, | ||||
|     { "p_delta[8]", 296 }, | ||||
|     { "p_delta[9]", 297 }, | ||||
|     { "p_generate", 256 }, | ||||
|     { "mipmaps",  255 + NO }, | ||||
|     { "alpha[0]", 267 + NO }, | ||||
|     { "alpha[1]", 268 + NO }, | ||||
|     { "alpha[2]", 269 + NO }, | ||||
|     { "alpha[3]", 270 + NO }, | ||||
|     { "beta[0]",  275 + NO }, | ||||
|     { "beta[1]",  276 + NO }, | ||||
|     { "beta[2]",  277 + NO }, | ||||
|     { "beta[3]",  278 + NO }, | ||||
|     { "beta[4]",  279 + NO }, | ||||
|     { "gamma[0]", 257 + NO }, | ||||
|     { "gamma[1]", 259 + NO }, | ||||
|     { "gamma[2]", 260 + NO }, | ||||
|     { "gamma[3]", 261 + NO }, | ||||
|     { "gamma[4]", 262 + NO }, | ||||
|     { "delta[0]", 257 + NO }, | ||||
|     { "delta[1]", 263 + NO }, | ||||
|     { "delta[2]", 264 + NO }, | ||||
|     { "delta[3]", 265 + NO }, | ||||
|     { "delta[4]", 266 + NO }, | ||||
|     { "delta[5]", 258 + NO }, | ||||
|     { "delta[6]", 271 + NO }, | ||||
|     { "delta[7]", 272 + NO }, | ||||
|     { "delta[8]", 273 + NO }, | ||||
|     { "delta[9]", 274 + NO }, | ||||
|     { "generate", 256 + NO }, | ||||
|     { "p_alpha[0]", 267 + PO }, | ||||
|     { "p_alpha[1]", 268 + PO }, | ||||
|     { "p_alpha[2]", 269 + PO }, | ||||
|     { "p_alpha[3]", 270 + PO }, | ||||
|     { "p_beta[0]",  275 + PO }, | ||||
|     { "p_beta[1]",  276 + PO }, | ||||
|     { "p_beta[2]",  277 + PO }, | ||||
|     { "p_beta[3]",  278 + PO }, | ||||
|     { "p_beta[4]",  279 + PO }, | ||||
|     { "p_gamma[0]", 257 + PO }, | ||||
|     { "p_gamma[1]", 259 + PO }, | ||||
|     { "p_gamma[2]", 260 + PO }, | ||||
|     { "p_gamma[3]", 261 + PO }, | ||||
|     { "p_gamma[4]", 262 + PO }, | ||||
|     { "p_delta[0]", 257 + PO }, | ||||
|     { "p_delta[1]", 263 + PO }, | ||||
|     { "p_delta[2]", 264 + PO }, | ||||
|     { "p_delta[3]", 265 + PO }, | ||||
|     { "p_delta[4]", 266 + PO }, | ||||
|     { "p_delta[5]", 258 + PO }, | ||||
|     { "p_delta[6]", 271 + PO }, | ||||
|     { "p_delta[7]", 272 + PO }, | ||||
|     { "p_delta[8]", 273 + PO }, | ||||
|     { "p_delta[9]", 274 + PO }, | ||||
| }}; | ||||
| 
 | ||||
| namespace { | ||||
|     auto& shaders() { | ||||
|         static std::unordered_map<uint32_t, std::vector<uint8_t>> shaderData; | ||||
|     auto& pshaders() { | ||||
|         static std::unordered_map<uint32_t, std::array<std::vector<uint8_t>, 2>> shaderData; | ||||
|         return shaderData; | ||||
|     } | ||||
| 
 | ||||
|     int on_resource(void*, const peparse::resource& res) { | ||||
|     int on_resource(void* ptr, const peparse::resource& res) { | ||||
|         if (res.type != peparse::RT_RCDATA || res.buf == nullptr || res.buf->bufLen <= 0) | ||||
|             return 0; | ||||
|         std::vector<uint8_t> resource_data(res.buf->bufLen); | ||||
|         std::copy_n(res.buf->buf, res.buf->bufLen, resource_data.data()); | ||||
|         shaders()[res.name] = resource_data; | ||||
| 
 | ||||
|         auto* shaders = reinterpret_cast<std::unordered_map<uint32_t, std::vector<uint8_t>>*>(ptr); | ||||
|         shaders->emplace(res.name, std::move(resource_data)); | ||||
|         return 0; | ||||
|     } | ||||
| 
 | ||||
|  | @ -116,33 +121,44 @@ namespace { | |||
| } | ||||
| 
 | ||||
| void Extract::extractShaders() { | ||||
|     if (!shaders().empty()) | ||||
|     if (!pshaders().empty()) | ||||
|         return; | ||||
| 
 | ||||
|     std::unordered_map<uint32_t, std::vector<uint8_t>> shaders{}; | ||||
| 
 | ||||
|     // parse the dll
 | ||||
|     peparse::parsed_pe* dll = peparse::ParsePEFromFile(getDllPath().c_str()); | ||||
|     if (!dll) | ||||
|         throw std::runtime_error("Unable to read Lossless.dll, is it installed?"); | ||||
|     peparse::IterRsrc(dll, on_resource, nullptr); | ||||
|     peparse::IterRsrc(dll, on_resource, reinterpret_cast<void*>(&shaders)); | ||||
|     peparse::DestructParsedPE(dll); | ||||
| 
 | ||||
|     // ensure all shaders are present
 | ||||
|     for (const auto& [name, idx] : nameIdxTable) | ||||
|         if (shaders().find(idx) == shaders().end()) | ||||
|             throw std::runtime_error("Shader not found: " + name + ".\n- Is Lossless Scaling up to date?"); | ||||
|     for (const auto& [name, idx] : nameIdxTable) { | ||||
|         auto fp16 = shaders.find(idx); | ||||
|         if (fp16 == shaders.end()) | ||||
|             throw std::runtime_error("Shader not found: " + name + " (FP16).\n- Is Lossless Scaling up to date?"); | ||||
|         auto fp32 = shaders.find(idx + FP); | ||||
|         if (fp32 == shaders.end()) | ||||
|             throw std::runtime_error("Shader not found: " + name + " (FP32).\n- Is Lossless Scaling up to date?"); | ||||
| 
 | ||||
|         pshaders().emplace(idx, std::array<std::vector<uint8_t>, 2>{ | ||||
|             std::move(fp32->second), | ||||
|             std::move(fp16->second) | ||||
|         }); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| std::vector<uint8_t> Extract::getShader(const std::string& name) { | ||||
|     if (shaders().empty()) | ||||
| std::vector<uint8_t> Extract::getShader(const std::string& name, bool fp16) { | ||||
|     if (pshaders().empty()) | ||||
|         throw std::runtime_error("Shaders are not loaded."); | ||||
| 
 | ||||
|     auto hit = nameIdxTable.find(name); | ||||
|     if (hit == nameIdxTable.end()) | ||||
|         throw std::runtime_error("Shader hash not found: " + name); | ||||
| 
 | ||||
|     auto sit = shaders().find(hit->second); | ||||
|     if (sit == shaders().end()) | ||||
|     auto sit = pshaders().find(hit->second); | ||||
|     if (sit == pshaders().end()) | ||||
|         throw std::runtime_error("Shader not found: " + name); | ||||
| 
 | ||||
|     return sit->second; | ||||
|     return fp16 ? sit->second.at(1) : sit->second.at(0); | ||||
| } | ||||
|  |  | |||
|  | @ -1,76 +0,0 @@ | |||
| #include "extract/trans.hpp" | ||||
| 
 | ||||
| #include <thirdparty/spirv.hpp> | ||||
| 
 | ||||
| #include <dxbc_modinfo.h> | ||||
| #include <dxbc_module.h> | ||||
| #include <dxbc_reader.h> | ||||
| 
 | ||||
| #include <cstdint> | ||||
| #include <cstddef> | ||||
| #include <algorithm> | ||||
| #include <vector> | ||||
| 
 | ||||
| using namespace Extract; | ||||
| 
 | ||||
| struct BindingOffsets { | ||||
|   uint32_t bindingIndex{}; | ||||
|   uint32_t bindingOffset{}; | ||||
|   uint32_t setIndex{}; | ||||
|   uint32_t setOffset{}; | ||||
| }; | ||||
| 
 | ||||
| std::vector<uint8_t> Extract::translateShader(std::vector<uint8_t> bytecode) { | ||||
|     // compile the shader
 | ||||
|     dxvk::DxbcReader reader(reinterpret_cast<const char*>(bytecode.data()), bytecode.size()); | ||||
|     dxvk::DxbcModule module(reader); | ||||
|     const dxvk::DxbcModuleInfo info{}; | ||||
|     auto code = module.compile(info, "CS"); | ||||
| 
 | ||||
|     // find all bindings
 | ||||
|     std::vector<BindingOffsets> bindingOffsets; | ||||
|     std::vector<uint32_t> varIds; | ||||
|     for (auto ins : code) { | ||||
|         if (ins.opCode() == spv::OpDecorate) { | ||||
|             if (ins.arg(2) == spv::DecorationBinding) { | ||||
|                 const uint32_t varId = ins.arg(1); | ||||
|                 bindingOffsets.resize(std::max(bindingOffsets.size(), size_t(varId + 1))); | ||||
|                 bindingOffsets[varId].bindingIndex = ins.arg(3); | ||||
|                 bindingOffsets[varId].bindingOffset = ins.offset() + 3; | ||||
|                 varIds.push_back(varId); | ||||
|             } | ||||
| 
 | ||||
|             if (ins.arg(2) == spv::DecorationDescriptorSet) { | ||||
|                 const uint32_t varId = ins.arg(1); | ||||
|                 bindingOffsets.resize(std::max(bindingOffsets.size(), size_t(varId + 1))); | ||||
|                 bindingOffsets[varId].setIndex = ins.arg(3); | ||||
|                 bindingOffsets[varId].setOffset = ins.offset() + 3; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         if (ins.opCode() == spv::OpFunction) | ||||
|             break; | ||||
|     } | ||||
| 
 | ||||
|     std::vector<BindingOffsets> validBindings; | ||||
|     for (const auto varId : varIds) { | ||||
|         auto info = bindingOffsets[varId]; | ||||
| 
 | ||||
|         if (info.bindingOffset) | ||||
|             validBindings.push_back(info); | ||||
|     } | ||||
| 
 | ||||
|     // patch binding offset
 | ||||
|     #pragma clang diagnostic push | ||||
|     #pragma clang diagnostic ignored "-Wunsafe-buffer-usage" | ||||
|         for (size_t i = 0; i < validBindings.size(); i++) | ||||
|             code.data()[validBindings.at(i).bindingOffset] // NOLINT
 | ||||
|                 = static_cast<uint8_t>(i); | ||||
|     #pragma clang diagnostic pop | ||||
| 
 | ||||
|     // return the new bytecode
 | ||||
|     std::vector<uint8_t> spirvBytecode(code.size()); | ||||
|     std::copy_n(reinterpret_cast<uint8_t*>(code.data()), | ||||
|         code.size(), spirvBytecode.data()); | ||||
|     return spirvBytecode; | ||||
| } | ||||
							
								
								
									
										24
									
								
								src/main.cpp
									
										
									
									
									
								
							
							
						
						
									
										24
									
								
								src/main.cpp
									
										
									
									
									
								
							|  | @ -3,10 +3,7 @@ | |||
| #include "utils/benchmark.hpp" | ||||
| #include "utils/utils.hpp" | ||||
| 
 | ||||
| #include <unistd.h> | ||||
| 
 | ||||
| #include <exception> | ||||
| #include <fstream> | ||||
| #include <stdexcept> | ||||
| #include <iostream> | ||||
| #include <cstdint> | ||||
|  | @ -34,7 +31,7 @@ namespace { | |||
|         try { | ||||
|             Config::activeConf = Config::getConfig(name); | ||||
|         } catch (const std::exception& e) { | ||||
|             std::cerr << "lsfg-vk: The configuration for " << name.second << " is invalid, IGNORING:\n"; | ||||
|             std::cerr << "lsfg-vk: The configuration for " << name.first << " is invalid, IGNORING:\n"; | ||||
|             std::cerr << e.what() << '\n'; | ||||
|             return; // default configuration will unload
 | ||||
|         } | ||||
|  | @ -45,8 +42,9 @@ namespace { | |||
|             return; // default configuration will unload
 | ||||
| 
 | ||||
|         // print config
 | ||||
|         std::cerr << "lsfg-vk: Loaded configuration for " << name.second << ":\n"; | ||||
|         std::cerr << "lsfg-vk: Loaded configuration for " << name.first << ":\n"; | ||||
|         if (!conf.dll.empty()) std::cerr << "  Using DLL from: " << conf.dll << '\n'; | ||||
|         if (conf.no_fp16) std::cerr << "  FP16 Acceleration: Force-disabled\n"; | ||||
|         std::cerr << "  Multiplier: " << conf.multiplier << '\n'; | ||||
|         std::cerr << "  Flow Scale: " << conf.flowScale << '\n'; | ||||
|         std::cerr << "  Performance Mode: " << (conf.performance ? "Enabled" : "Disabled") << '\n'; | ||||
|  | @ -56,22 +54,6 @@ namespace { | |||
|         // remove mesa var in favor of config
 | ||||
|         unsetenv("MESA_VK_WSI_PRESENT_MODE"); // NOLINT
 | ||||
| 
 | ||||
|         // write latest file
 | ||||
|         try { | ||||
|             std::ofstream latest("/tmp/lsfg-vk_last", std::ios::trunc); | ||||
|             if (!latest.is_open()) | ||||
|                 throw std::runtime_error("Failed to open /tmp/lsfg-vk_last for writing"); | ||||
|             latest << "exe: " << name.first << '\n'; | ||||
|             latest << "comm: " << name.second << '\n'; | ||||
|             latest << "pid: " << getpid() << '\n'; | ||||
|             if (!latest.good()) | ||||
|                 throw std::runtime_error("Failed to write to /tmp/lsfg-vk_last"); | ||||
|         } catch (const std::exception& e) { | ||||
|             std::cerr << "lsfg-vk: An error occurred while trying to write the latest file, exiting:\n"; | ||||
|             std::cerr << "- " << e.what() << '\n'; | ||||
|             exit(EXIT_FAILURE); | ||||
|         } | ||||
| 
 | ||||
|         // load shaders
 | ||||
|         try { | ||||
|             Extract::extractShaders(); | ||||
|  |  | |||
|  | @ -1,7 +1,6 @@ | |||
| #include "utils/benchmark.hpp" | ||||
| #include "config/config.hpp" | ||||
| #include "extract/extract.hpp" | ||||
| #include "extract/trans.hpp" | ||||
| 
 | ||||
| #include <vulkan/vulkan_core.h> | ||||
| #include <lsfg_3_1.hpp> | ||||
|  | @ -42,11 +41,8 @@ void Benchmark::run(uint32_t width, uint32_t height) { | |||
|     lsfgInitialize( | ||||
|         deviceUUID, // some magic number if not given
 | ||||
|         conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1, | ||||
|         [](const std::string& name) -> std::vector<uint8_t> { | ||||
|             auto dxbc = Extract::getShader(name); | ||||
|             auto spirv = Extract::translateShader(dxbc); | ||||
|             return spirv; | ||||
|         } | ||||
|         conf.no_fp16, | ||||
|         Extract::getShader | ||||
|     ); | ||||
|     const int32_t ctx = lsfgCreateContext(-1, -1, {}, | ||||
|         { .width = width, .height = height }, | ||||
|  |  | |||
|  | @ -209,20 +209,25 @@ void Utils::resetLimitN(const std::string& id) noexcept { | |||
| } | ||||
| 
 | ||||
| std::pair<std::string, std::string> Utils::getProcessName() { | ||||
|     // check override first
 | ||||
|     const char* process_name = std::getenv("LSFG_PROCESS"); | ||||
|     if (process_name && *process_name != '\0') | ||||
|         return { process_name, process_name }; | ||||
| 
 | ||||
|     // then check benchmark flag
 | ||||
|     const char* benchmark_flag = std::getenv("LSFG_BENCHMARK"); | ||||
|     if (benchmark_flag) | ||||
|         return { "benchmark", "benchmark" }; | ||||
|     std::array<char, 4096> exe{}; | ||||
| 
 | ||||
|     // find executed binary
 | ||||
|     const ssize_t exe_len = readlink("/proc/self/exe", exe.data(), exe.size() - 1); | ||||
|     if (exe_len <= 0) | ||||
|         return { "Unknown Process", "unknown" }; | ||||
|     exe.at(static_cast<size_t>(exe_len)) = '\0'; | ||||
|     std::string exe_str(exe.data()); | ||||
| 
 | ||||
|     // find command name as well
 | ||||
|     std::ifstream comm_file("/proc/self/comm"); | ||||
|     if (!comm_file.is_open()) | ||||
|         return { std::string(exe.data()), "unknown" }; | ||||
|  | @ -233,7 +238,37 @@ std::pair<std::string, std::string> Utils::getProcessName() { | |||
|     if (comm_str.back() == '\n') | ||||
|         comm_str.pop_back(); | ||||
| 
 | ||||
|     return{ std::string(exe.data()), comm_str }; | ||||
|     // replace binary with exe for wine apps
 | ||||
|     if (exe_str.find("wine") != std::string::npos | ||||
|         || exe_str.find("proton") != std::string::npos) { | ||||
| 
 | ||||
|         std::ifstream proc_maps("/proc/self/maps"); | ||||
|         if (!proc_maps.is_open()) | ||||
|             return{ exe_str, comm_str }; | ||||
| 
 | ||||
|         std::string line; | ||||
|         while (std::getline(proc_maps, line)) { | ||||
|             if (!line.ends_with(".exe")) | ||||
|                 continue; | ||||
| 
 | ||||
|             size_t pos = line.find_first_of('/'); | ||||
|             if (pos == std::string::npos) { | ||||
|                 pos = line.find_last_of(' '); | ||||
|                 if (pos == std::string::npos) | ||||
|                     continue; | ||||
|                 pos += 1; // skip space
 | ||||
|             } | ||||
| 
 | ||||
|             const std::string exe_name = line.substr(pos); | ||||
|             if (exe_name.empty()) | ||||
|                 continue; | ||||
| 
 | ||||
|             exe_str = exe_name; | ||||
|             break; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return{ exe_str, comm_str }; | ||||
| } | ||||
| 
 | ||||
| std::string Utils::getConfigFile() { | ||||
|  |  | |||
|  | @ -4,7 +4,6 @@ | |||
| #include "core/image.hpp" | ||||
| #include "core/instance.hpp" | ||||
| #include "extract/extract.hpp" | ||||
| #include "extract/trans.hpp" | ||||
| 
 | ||||
| #include <vulkan/vulkan_core.h> | ||||
| 
 | ||||
|  | @ -35,7 +34,7 @@ const float FLOW_SCALE = 0.7F; | |||
| 
 | ||||
| // test configuration end
 | ||||
| 
 | ||||
| #ifdef PERFORMANCE_MODE | ||||
| #if PERFORMANCE_MODE | ||||
| #include "lsfg_3_1p.hpp" | ||||
| using namespace LSFG_3_1P; | ||||
| #else | ||||
|  | @ -75,11 +74,8 @@ namespace { | |||
|         initialize( | ||||
|             0x1463ABAC, | ||||
|             IS_HDR, 1.0F / FLOW_SCALE, MULTIPLIER - 1, | ||||
|             [](const std::string& name) -> std::vector<uint8_t> { | ||||
|                 auto dxbc = Extract::getShader(name); | ||||
|                 auto spirv = Extract::translateShader(dxbc); | ||||
|                 return spirv; | ||||
|             } | ||||
|             false, | ||||
|             Extract::getShader | ||||
|         ); | ||||
|         initializeRenderDoc(); | ||||
|         return createContext( | ||||
|  | @ -106,7 +102,7 @@ namespace { | |||
| int main() { | ||||
|     // initialize host Vulkan
 | ||||
|     const Core::Instance instance{}; | ||||
|     const Core::Device device{instance, 0x1463ABAC}; | ||||
|     const Core::Device device{instance, 0x1463ABAC, false}; | ||||
|     const Core::CommandPool commandPool{device}; | ||||
| 
 | ||||
|     // setup test
 | ||||
|  |  | |||
							
								
								
									
										1
									
								
								thirdparty/dxbc
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								thirdparty/dxbc
									
										
									
									
										vendored
									
									
								
							|  | @ -1 +0,0 @@ | |||
| Subproject commit 78ab59a8aaeb43cd1b0a5e91ba86722433a10b78 | ||||
|  | @ -39,6 +39,14 @@ | |||
|             <property name="icon-name">folder-symbolic</property> | ||||
|         </object> | ||||
|         </child> | ||||
|         <!--General Properties: FP16 Override --> | ||||
|         <child> | ||||
|         <object class="LSPrefSwitch" id="no_fp16"> | ||||
|             <property name="opt-name">Force-disable FP16</property> | ||||
|             <property name="opt-subtitle">(Global Option) Force-disable FP16 acceleration (use on older NVIDIA GPUs)</property> | ||||
|             <property name="default-state">false</property> | ||||
|         </object> | ||||
|         </child> | ||||
|         <!--General Properties: Profile name --> | ||||
|         <child> | ||||
|         <object class="LSPrefEntry" id="profile_name"> | ||||
|  |  | |||
|  | @ -30,6 +30,7 @@ pub fn default_config() -> TomlConfig { | |||
|         version: 1, | ||||
|         global: TomlGlobal { | ||||
|             dll: None, | ||||
|             no_fp16: false | ||||
|         }, | ||||
|         game: vec![ | ||||
|             TomlGame { | ||||
|  | @ -49,7 +50,7 @@ pub fn default_config() -> TomlConfig { | |||
|                 experimental_present_mode: PresentMode::Vsync, | ||||
|             }, | ||||
|             TomlGame { | ||||
|                 exe: String::from("Genshin"), | ||||
|                 exe: String::from("GenshinImpact.exe"), | ||||
|                 multiplier: Multiplier::from(3), | ||||
|                 flow_scale: FlowScale::from(1.0), | ||||
|                 performance_mode: false, | ||||
|  |  | |||
|  | @ -62,7 +62,9 @@ impl Into<u32> for PresentMode { | |||
| /// Global configuration for the application
 | ||||
| #[derive(Debug, Default, Clone, Deserialize, Serialize)] | ||||
| pub struct TomlGlobal { | ||||
|     pub dll: Option<String> | ||||
|     pub dll: Option<String>, | ||||
|     #[serde(default)] | ||||
|     pub no_fp16: bool | ||||
| } | ||||
| 
 | ||||
| /// Game-specific configuration
 | ||||
|  |  | |||
|  | @ -25,6 +25,7 @@ pub fn build(app: &adw::Application) { | |||
|     if let Some(dll_path) = config.global.dll { | ||||
|         imp.main.imp().dll.imp().entry.set_text(&dll_path); | ||||
|     } | ||||
|     imp.main.imp().no_fp16.imp().switch.set_active(config.global.no_fp16); | ||||
| 
 | ||||
|     // register handlers on sidebar pane.
 | ||||
|     sidebar_handler::register_signals(&imp.sidebar, imp.main.clone()); | ||||
|  |  | |||
|  | @ -94,6 +94,12 @@ pub fn register_signals(sidebar_: pane::PaneSidebar, main: &pane::PaneMain) { | |||
|             } | ||||
|         }); | ||||
|     }); | ||||
|     let no_fp16 = main.no_fp16.imp(); | ||||
|     no_fp16.switch.connect_state_notify(|switch| { | ||||
|         let _ = config::edit_config(|config| { | ||||
|             config.global.no_fp16 = switch.state(); | ||||
|         }); | ||||
|     }); | ||||
| 
 | ||||
|     // utility buttons
 | ||||
|     let entry = dll.entry.clone(); | ||||
|  |  | |||
|  | @ -18,11 +18,28 @@ pub fn find_vulkan_processes() -> ProcResult<Vec<(String, String)>> { | |||
|             continue; | ||||
|         } | ||||
| 
 | ||||
|         // find executed binary
 | ||||
|         let mut exe = prc.exe()?.to_string_lossy().to_string(); | ||||
| 
 | ||||
|         // replace binary with exe for wine apps
 | ||||
|         if exe.contains("wine") || exe.contains("proton") { | ||||
|             let result = maps.iter() | ||||
|                 .filter_map(|map| map.filename()) | ||||
|                 .map(|filename| filename.to_string_lossy().to_string()) | ||||
|                 .find(|filename| filename.ends_with(".exe")); | ||||
| 
 | ||||
|             if let Some(exe_name) = result { | ||||
|                 exe = exe_name; | ||||
|             } | ||||
|         } | ||||
| 
 | ||||
|         // split off last part of the path
 | ||||
|         exe = exe.split('/').last().unwrap_or(&exe).to_string(); | ||||
| 
 | ||||
|         // format process information
 | ||||
|         let pid = prc.pid(); | ||||
|         let name = prc.stat()?.comm; | ||||
|         let process_info = format!("PID {}: {}", pid, name); | ||||
|         processes.push((process_info, name)); | ||||
|         let process_info = format!("PID {}: {}", pid, exe); | ||||
|         processes.push((process_info, exe)); | ||||
|     } | ||||
| 
 | ||||
|     Ok(processes) | ||||
|  |  | |||
|  | @ -9,6 +9,8 @@ pub struct PaneMain { | |||
|     #[template_child] | ||||
|     pub dll: TemplateChild<PrefEntry>, | ||||
|     #[template_child] | ||||
|     pub no_fp16: TemplateChild<PrefSwitch>, | ||||
|     #[template_child] | ||||
|     pub profile_name: TemplateChild<PrefEntry>, | ||||
|     #[template_child] | ||||
|     pub multiplier: TemplateChild<PrefNumber>, | ||||
|  |  | |||
		Loading…
	
	Add table
		
		Reference in a new issue