From cb234bde746531f99165e100f2a9d695892a382d Mon Sep 17 00:00:00 2001 From: PancakeTAS Date: Wed, 30 Jul 2025 18:59:22 +0200 Subject: [PATCH] feat(fp16): translate shaders to fp16 this will not work with the steam version, nor is it deployable --- CMakeLists.txt | 2 +- include/extract/extract.hpp | 3 +- src/context.cpp | 7 +- src/extract/extract.cpp | 198 +++++++++++++++++++++++------------- src/utils/benchmark.cpp | 7 +- 5 files changed, 135 insertions(+), 82 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index fa83c32..fa9d352 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,7 +45,7 @@ set_target_properties(lsfg-vk PROPERTIES target_include_directories(lsfg-vk PUBLIC include) target_link_libraries(lsfg-vk PUBLIC - pe-parse dxbc toml11 + pe-parse dxbc toml11 SPIRV-Tools-opt lsfg-vk-framegen) get_target_property(TOML11_INCLUDE_DIRS toml11 INTERFACE_INCLUDE_DIRECTORIES) diff --git a/include/extract/extract.hpp b/include/extract/extract.hpp index 812a2bd..45465c7 100644 --- a/include/extract/extract.hpp +++ b/include/extract/extract.hpp @@ -17,10 +17,11 @@ namespace Extract { /// Get a shader by name. /// /// @param name The name of the shader to get. + /// @param fp16 If true, use the FP16 variant of shaders. /// @return The shader bytecode. /// /// @throws std::runtime_error if the shader is not found. /// - std::vector getShader(const std::string& name); + std::vector getShader(const std::string& name, bool fp16); } diff --git a/src/context.cpp b/src/context.cpp index e00804a..aa55da3 100644 --- a/src/context.cpp +++ b/src/context.cpp @@ -2,7 +2,6 @@ #include "config/config.hpp" #include "common/exception.hpp" #include "extract/extract.hpp" -#include "extract/trans.hpp" #include "utils/utils.hpp" #include "hooks.hpp" #include "layer.hpp" @@ -99,11 +98,7 @@ LsContext::LsContext(const Hooks::DeviceInfo& info, VkSwapchainKHR swapchain, lsfgInitialize( Utils::getDeviceUUID(info.physicalDevice), conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1, - [](const std::string& name) { - auto dxbc = Extract::getShader(name); - auto spirv = Extract::translateShader(dxbc); - return spirv; - } + Extract::getShader ); this->lsfgCtxId = std::shared_ptr( diff --git a/src/extract/extract.cpp b/src/extract/extract.cpp index 35d1635..ecbb35f 100644 --- a/src/extract/extract.cpp +++ b/src/extract/extract.cpp @@ -1,86 +1,68 @@ #include "extract/extract.hpp" #include "config/config.hpp" +#include +#include #include -#include +#include #include #include -#include #include +#include +#include +#include #include -#include +#include #include +#include using namespace Extract; +const uint32_t NO = 49; // native offset const std::unordered_map nameIdxTable = {{ - { "mipmaps", 255 }, - { "alpha[0]", 267 }, - { "alpha[1]", 268 }, - { "alpha[2]", 269 }, - { "alpha[3]", 270 }, - { "beta[0]", 275 }, - { "beta[1]", 276 }, - { "beta[2]", 277 }, - { "beta[3]", 278 }, - { "beta[4]", 279 }, - { "gamma[0]", 257 }, - { "gamma[1]", 259 }, - { "gamma[2]", 260 }, - { "gamma[3]", 261 }, - { "gamma[4]", 262 }, - { "delta[0]", 257 }, - { "delta[1]", 263 }, - { "delta[2]", 264 }, - { "delta[3]", 265 }, - { "delta[4]", 266 }, - { "delta[5]", 258 }, - { "delta[6]", 271 }, - { "delta[7]", 272 }, - { "delta[8]", 273 }, - { "delta[9]", 274 }, - { "generate", 256 }, - { "p_mipmaps", 255 }, - { "p_alpha[0]", 290 }, - { "p_alpha[1]", 291 }, - { "p_alpha[2]", 292 }, - { "p_alpha[3]", 293 }, - { "p_beta[0]", 298 }, - { "p_beta[1]", 299 }, - { "p_beta[2]", 300 }, - { "p_beta[3]", 301 }, - { "p_beta[4]", 302 }, - { "p_gamma[0]", 280 }, - { "p_gamma[1]", 282 }, - { "p_gamma[2]", 283 }, - { "p_gamma[3]", 284 }, - { "p_gamma[4]", 285 }, - { "p_delta[0]", 280 }, - { "p_delta[1]", 286 }, - { "p_delta[2]", 287 }, - { "p_delta[3]", 288 }, - { "p_delta[4]", 289 }, - { "p_delta[5]", 281 }, - { "p_delta[6]", 294 }, - { "p_delta[7]", 295 }, - { "p_delta[8]", 296 }, - { "p_delta[9]", 297 }, - { "p_generate", 256 }, + { "mipmaps", 255 + NO }, + { "alpha[0]", 267 + NO }, + { "alpha[1]", 268 + NO }, + { "alpha[2]", 269 + NO }, + { "alpha[3]", 270 + NO }, + { "beta[0]", 275 + NO }, + { "beta[1]", 276 + NO }, + { "beta[2]", 277 + NO }, + { "beta[3]", 278 + NO }, + { "beta[4]", 279 + NO }, + { "gamma[0]", 257 + NO }, + { "gamma[1]", 259 + NO }, + { "gamma[2]", 260 + NO }, + { "gamma[3]", 261 + NO }, + { "gamma[4]", 262 + NO }, + { "delta[0]", 257 + NO }, + { "delta[1]", 263 + NO }, + { "delta[2]", 264 + NO }, + { "delta[3]", 265 + NO }, + { "delta[4]", 266 + NO }, + { "delta[5]", 258 + NO }, + { "delta[6]", 271 + NO }, + { "delta[7]", 272 + NO }, + { "delta[8]", 273 + NO }, + { "delta[9]", 274 + NO }, + { "generate", 256 + NO } }}; namespace { - auto& shaders() { - static std::unordered_map> shaderData; + auto& pshaders() { + static std::unordered_map, 2>> shaderData; return shaderData; } - int on_resource(void*, const peparse::resource& res) { + int on_resource(void* ptr, const peparse::resource& res) { if (res.type != peparse::RT_RCDATA || res.buf == nullptr || res.buf->bufLen <= 0) return 0; std::vector resource_data(res.buf->bufLen); std::copy_n(res.buf->buf, res.buf->bufLen, resource_data.data()); - shaders()[res.name] = resource_data; + + auto* shaders = reinterpret_cast>*>(ptr); + shaders->emplace(res.name, std::move(resource_data)); return 0; } @@ -113,36 +95,116 @@ namespace { // final fallback return "Lossless.dll"; } + + std::array, 2> fixShaders(const std::vector& spirv) { + std::vector shader(spirv.size() / 4); + std::copy_n(spirv.data(), spirv.size(), reinterpret_cast(shader.data())); + + // patch bindings + std::vector samplerOffsets{}; + std::vector sampledImageOffsets{}; + std::vector storageImageOffsets{}; + std::vector uniformBufferOffsets{}; + + uint32_t prevIdx{ 0 }; + uint32_t type{ 0 }; + + size_t i{ 5 }; + while (i < shader.size()) { + const uint32_t word = shader[i]; + const uint16_t op = word & 0xFFFF; + const uint16_t len = word >> 16; + if (op == 71 /*spv::OpDecorate*/) { + const uint32_t decoration = shader[i + 2]; + if (decoration == 33 /*spv::DecorationBinding*/) { + const uint32_t idx = shader[i + 3]; + if (idx <= prevIdx) + type++; + prevIdx = idx; + + switch (type) { + case 1: + samplerOffsets.emplace_back(i + 3); + break; + case 2: + sampledImageOffsets.emplace_back(i + 3); + break; + case 3: + storageImageOffsets.emplace_back(i + 3); + break; + case 4: + uniformBufferOffsets.emplace_back(i + 3); + break; + default: + break; + } + } + } + + if (op == 54 /*spv::OpFunction*/) + break; + + i += len ? len : 1; + } + + uint32_t binding{ 0 }; + for (const auto& idx : uniformBufferOffsets) + shader[idx] = binding++; + for (const auto& idx : samplerOffsets) + shader[idx] = binding++; + for (const auto& idx : sampledImageOffsets) + shader[idx] = binding++; + for (const auto& idx : storageImageOffsets) + shader[idx] = binding++; + + std::vector result_fp32(shader.size() * sizeof(uint32_t)); + std::copy_n(reinterpret_cast(shader.data()), + result_fp32.size(), result_fp32.data()); + + spvtools::Optimizer optimizer(SPV_ENV_VULKAN_1_3); + optimizer.RegisterPass(spvtools::CreateConvertRelaxedToHalfPass()); + optimizer.Run(shader.data(), shader.size() * sizeof(uint32_t), &shader); + + std::vector result_fp16(shader.size() * sizeof(uint32_t)); + std::copy_n(reinterpret_cast(shader.data()), + result_fp16.size(), result_fp16.data()); + return { std::move(result_fp32), std::move(result_fp16) }; + } } void Extract::extractShaders() { - if (!shaders().empty()) + if (!pshaders().empty()) return; + std::unordered_map> shaders{}; + // parse the dll peparse::parsed_pe* dll = peparse::ParsePEFromFile(getDllPath().c_str()); if (!dll) throw std::runtime_error("Unable to read Lossless.dll, is it installed?"); - peparse::IterRsrc(dll, on_resource, nullptr); + peparse::IterRsrc(dll, on_resource, reinterpret_cast(&shaders)); peparse::DestructParsedPE(dll); // ensure all shaders are present for (const auto& [name, idx] : nameIdxTable) - if (shaders().find(idx) == shaders().end()) + if (shaders.find(idx) == shaders.end()) throw std::runtime_error("Shader not found: " + name + ".\n- Is Lossless Scaling up to date?"); + + // fix shader bytecode + for (auto& [idx, data] : shaders) + pshaders()[idx] = fixShaders(data); } -std::vector Extract::getShader(const std::string& name) { - if (shaders().empty()) +std::vector Extract::getShader(const std::string& name, bool fp16) { + if (pshaders().empty()) throw std::runtime_error("Shaders are not loaded."); auto hit = nameIdxTable.find(name); if (hit == nameIdxTable.end()) throw std::runtime_error("Shader hash not found: " + name); - auto sit = shaders().find(hit->second); - if (sit == shaders().end()) + auto sit = pshaders().find(hit->second); + if (sit == pshaders().end()) throw std::runtime_error("Shader not found: " + name); - - return sit->second; + return fp16 ? sit->second.at(1) : sit->second.at(0); } diff --git a/src/utils/benchmark.cpp b/src/utils/benchmark.cpp index 0f0d0fd..444663c 100644 --- a/src/utils/benchmark.cpp +++ b/src/utils/benchmark.cpp @@ -1,7 +1,6 @@ #include "utils/benchmark.hpp" #include "config/config.hpp" #include "extract/extract.hpp" -#include "extract/trans.hpp" #include #include @@ -42,11 +41,7 @@ void Benchmark::run(uint32_t width, uint32_t height) { lsfgInitialize( deviceUUID, // some magic number if not given conf.hdr, 1.0F / conf.flowScale, conf.multiplier - 1, - [](const std::string& name) -> std::vector { - auto dxbc = Extract::getShader(name); - auto spirv = Extract::translateShader(dxbc); - return spirv; - } + Extract::getShader ); const int32_t ctx = lsfgCreateContext(-1, -1, {}, { .width = width, .height = height },