From 73e09afcf4b3042de734fca99693c9d9bbefef69 Mon Sep 17 00:00:00 2001 From: PancakeTAS Date: Tue, 5 Aug 2025 01:58:39 +0200 Subject: [PATCH] feat(fp16): adding all shaders --- framegen/v3.1p_src/shaders/generate.cpp | 4 +- framegen/v3.1p_src/shaders/mipmaps.cpp | 4 +- src/extract/extract.cpp | 122 ++++++++---------------- 3 files changed, 43 insertions(+), 87 deletions(-) diff --git a/framegen/v3.1p_src/shaders/generate.cpp b/framegen/v3.1p_src/shaders/generate.cpp index 5f511da..6c84945 100644 --- a/framegen/v3.1p_src/shaders/generate.cpp +++ b/framegen/v3.1p_src/shaders/generate.cpp @@ -21,12 +21,12 @@ Generate::Generate(Vulkan& vk, inImg3(std::move(inImg3)), inImg4(std::move(inImg4)), inImg5(std::move(inImg5)) { // create resources - this->shaderModule = vk.shaders.getShader(vk.device, "p_generate", + this->shaderModule = vk.shaders.getShader(vk.device, "generate", { { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, { 2, VK_DESCRIPTOR_TYPE_SAMPLER }, { 5, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, { 1, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }); - this->pipeline = vk.shaders.getPipeline(vk.device, "p_generate"); + this->pipeline = vk.shaders.getPipeline(vk.device, "generate"); this->samplers.at(0) = vk.resources.getSampler(vk.device); this->samplers.at(1) = vk.resources.getSampler(vk.device, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, VK_COMPARE_OP_ALWAYS); diff --git a/framegen/v3.1p_src/shaders/mipmaps.cpp b/framegen/v3.1p_src/shaders/mipmaps.cpp index 531b34f..7d4b4a6 100644 --- a/framegen/v3.1p_src/shaders/mipmaps.cpp +++ b/framegen/v3.1p_src/shaders/mipmaps.cpp @@ -16,12 +16,12 @@ Mipmaps::Mipmaps(Vulkan& vk, Core::Image inImg_0, Core::Image inImg_1) : inImg_0(std::move(inImg_0)), inImg_1(std::move(inImg_1)) { // create resources - this->shaderModule = vk.shaders.getShader(vk.device, "p_mipmaps", + this->shaderModule = vk.shaders.getShader(vk.device, "mipmaps", { { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER }, { 1, VK_DESCRIPTOR_TYPE_SAMPLER }, { 1, VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE }, { 7, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE } }); - this->pipeline = vk.shaders.getPipeline(vk.device, "p_mipmaps"); + this->pipeline = vk.shaders.getPipeline(vk.device, "mipmaps"); this->buffer = vk.resources.getBuffer(vk.device); this->sampler = vk.resources.getSampler(vk.device); for (size_t i = 0; i < 2; i++) diff --git a/src/extract/extract.cpp b/src/extract/extract.cpp index ecbb35f..99f99a9 100644 --- a/src/extract/extract.cpp +++ b/src/extract/extract.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -20,6 +19,8 @@ using namespace Extract; const uint32_t NO = 49; // native offset +const uint32_t PO = NO + 23; // performance+native offset +const uint32_t FP = 49; // fp32 offset const std::unordered_map nameIdxTable = {{ { "mipmaps", 255 + NO }, { "alpha[0]", 267 + NO }, @@ -46,7 +47,31 @@ const std::unordered_map nameIdxTable = {{ { "delta[7]", 272 + NO }, { "delta[8]", 273 + NO }, { "delta[9]", 274 + NO }, - { "generate", 256 + NO } + { "generate", 256 + NO }, + { "p_alpha[0]", 267 + PO }, + { "p_alpha[1]", 268 + PO }, + { "p_alpha[2]", 269 + PO }, + { "p_alpha[3]", 270 + PO }, + { "p_beta[0]", 275 + PO }, + { "p_beta[1]", 276 + PO }, + { "p_beta[2]", 277 + PO }, + { "p_beta[3]", 278 + PO }, + { "p_beta[4]", 279 + PO }, + { "p_gamma[0]", 257 + PO }, + { "p_gamma[1]", 259 + PO }, + { "p_gamma[2]", 260 + PO }, + { "p_gamma[3]", 261 + PO }, + { "p_gamma[4]", 262 + PO }, + { "p_delta[0]", 257 + PO }, + { "p_delta[1]", 263 + PO }, + { "p_delta[2]", 264 + PO }, + { "p_delta[3]", 265 + PO }, + { "p_delta[4]", 266 + PO }, + { "p_delta[5]", 258 + PO }, + { "p_delta[6]", 271 + PO }, + { "p_delta[7]", 272 + PO }, + { "p_delta[8]", 273 + PO }, + { "p_delta[9]", 274 + PO }, }}; namespace { @@ -95,81 +120,6 @@ namespace { // final fallback return "Lossless.dll"; } - - std::array, 2> fixShaders(const std::vector& spirv) { - std::vector shader(spirv.size() / 4); - std::copy_n(spirv.data(), spirv.size(), reinterpret_cast(shader.data())); - - // patch bindings - std::vector samplerOffsets{}; - std::vector sampledImageOffsets{}; - std::vector storageImageOffsets{}; - std::vector uniformBufferOffsets{}; - - uint32_t prevIdx{ 0 }; - uint32_t type{ 0 }; - - size_t i{ 5 }; - while (i < shader.size()) { - const uint32_t word = shader[i]; - const uint16_t op = word & 0xFFFF; - const uint16_t len = word >> 16; - if (op == 71 /*spv::OpDecorate*/) { - const uint32_t decoration = shader[i + 2]; - if (decoration == 33 /*spv::DecorationBinding*/) { - const uint32_t idx = shader[i + 3]; - if (idx <= prevIdx) - type++; - prevIdx = idx; - - switch (type) { - case 1: - samplerOffsets.emplace_back(i + 3); - break; - case 2: - sampledImageOffsets.emplace_back(i + 3); - break; - case 3: - storageImageOffsets.emplace_back(i + 3); - break; - case 4: - uniformBufferOffsets.emplace_back(i + 3); - break; - default: - break; - } - } - } - - if (op == 54 /*spv::OpFunction*/) - break; - - i += len ? len : 1; - } - - uint32_t binding{ 0 }; - for (const auto& idx : uniformBufferOffsets) - shader[idx] = binding++; - for (const auto& idx : samplerOffsets) - shader[idx] = binding++; - for (const auto& idx : sampledImageOffsets) - shader[idx] = binding++; - for (const auto& idx : storageImageOffsets) - shader[idx] = binding++; - - std::vector result_fp32(shader.size() * sizeof(uint32_t)); - std::copy_n(reinterpret_cast(shader.data()), - result_fp32.size(), result_fp32.data()); - - spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_3); - optimizer.RegisterPass(spvtools::CreateConvertRelaxedToHalfPass()); - optimizer.Run(shader.data(), shader.size() * sizeof(uint32_t), &shader); - - std::vector result_fp16(shader.size() * sizeof(uint32_t)); - std::copy_n(reinterpret_cast(shader.data()), - result_fp16.size(), result_fp16.data()); - return { std::move(result_fp32), std::move(result_fp16) }; - } } void Extract::extractShaders() { @@ -186,13 +136,19 @@ void Extract::extractShaders() { peparse::DestructParsedPE(dll); // ensure all shaders are present - for (const auto& [name, idx] : nameIdxTable) - if (shaders.find(idx) == shaders.end()) - throw std::runtime_error("Shader not found: " + name + ".\n- Is Lossless Scaling up to date?"); + for (const auto& [name, idx] : nameIdxTable) { + auto fp16 = shaders.find(idx); + if (fp16 == shaders.end()) + throw std::runtime_error("Shader not found: " + name + " (FP16).\n- Is Lossless Scaling up to date?"); + auto fp32 = shaders.find(idx + FP); + if (fp32 == shaders.end()) + throw std::runtime_error("Shader not found: " + name + " (FP32).\n- Is Lossless Scaling up to date?"); - // fix shader bytecode - for (auto& [idx, data] : shaders) - pshaders()[idx] = fixShaders(data); + pshaders().emplace(idx, std::array, 2>{ + std::move(fp32->second), + std::move(fp16->second) + }); + } } std::vector Extract::getShader(const std::string& name, bool fp16) {